import nltk import streamlit as st nltk.download('punkt') def fragment_text(text, tokenizer): sentences = nltk.tokenize.sent_tokenize(text) max_len = tokenizer.max_len_single_sentence chunks = [] chunk = "" count = -1 for sentence in sentences: count += 1 combined_length = len(tokenizer.tokenize(sentence)) + len(chunk) if combined_length <= max_len: chunk += sentence + " " else: chunks.append(chunk.strip()) chunk = sentence + " " if chunk != "": chunks.append(chunk.strip()) return chunks def summarize_text(text): chunks = fragment_text(text, tokenizer) summaries = [] for chunk in chunks: input = tokenizer(chunk, return_tensors='pt') output = model.generate(**input) summary = tokenizer.decode(*output, skip_special_tokens=True) summaries.append(summary) final_summary = " ".join(summaries) return final_summary # Importa o modelo de sumarização from transformers import AutoTokenizer, AutoModelForSeq2SeqLM # Define o modelo de sumarização checkpoint = "tclopess/bart_samsum" tokenizer = AutoTokenizer.from_pretrained(checkpoint) model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint) text_input = st.text_area("Cole ou digite o texto a ser resumido:") button = st.button("Resumo") # Chama a função de sumarização se o botão for clicado if button: summary = summarize_text(text_input) st.write("Resumo:") st.write(summary)