lossLopes commited on
Commit
6596a58
·
1 Parent(s): 8bfb75b

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -65
app.py DELETED
@@ -1,65 +0,0 @@
1
- import nltk
2
- import streamlit as st
3
- import transformers
4
- import torch
5
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
6
- nltk.download('punkt', force=True)
7
-
8
-
9
- def fragment_text(text, tokenizer):
10
-
11
- sentences = nltk.tokenize.sent_tokenize(text)
12
- max_len = tokenizer.max_len_single_sentence
13
-
14
- chunks = []
15
- chunk = ""
16
- count = -1
17
-
18
- for sentence in sentences:
19
- count += 1
20
- combined_length = len(tokenizer.tokenize(sentence)) + len(chunk)
21
-
22
- if combined_length <= max_len:
23
- chunk += sentence + " "
24
- else:
25
- chunks.append(chunk.strip())
26
- chunk = sentence + " "
27
-
28
- if chunk != "":
29
- chunks.append(chunk.strip())
30
-
31
- return chunks
32
-
33
- def summarize_text(text):
34
-
35
- chunks = fragment_text(text, tokenizer)
36
-
37
- summaries = []
38
- for chunk in chunks:
39
- input = tokenizer(chunk, return_tensors='pt')
40
- output = model.generate(**input)
41
- summary = tokenizer.decode(*output, skip_special_tokens=True)
42
- summaries.append(summary)
43
-
44
- final_summary = " ".join(summaries)
45
- return final_summary
46
-
47
- # Importa o modelo de sumarização
48
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
49
-
50
- # Define o modelo de sumarização
51
- checkpoint = "tclopess/bart_samsum"
52
- tokenizer = AutoTokenizer.from_pretrained(checkpoint)
53
- model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
54
-
55
- # Adiciona um campo de input
56
- text_input = st.text_area("Cola ou digite o texto a ser resumido:")
57
-
58
- # Adiciona um botão
59
- button = st.button("Resumo")
60
-
61
- # Chama a função de sumarização se o botão for clicado
62
- if button:
63
- summary = summarize_text(text_input)
64
- st.write("Resumo:")
65
- st.write(summaries)