Spaces:

BugZoid
/

text-humanizer

Sleeping

App Files Files Community

BugZoid commited on Jan 11

Commit

aeb2715

verified ·

1 Parent(s): 4739dfa

Update app.py

Browse files

Files changed (1) hide show

app.py +120 -62

app.py CHANGED Viewed

@@ -1,72 +1,110 @@
 import streamlit as st
 from transformers import T5ForConditionalGeneration, T5Tokenizer
-# Initialize session state for model if not already done
-if 'model_loaded' not in st.session_state:
-    st.session_state.tokenizer = T5Tokenizer.from_pretrained("t5-base")
-    st.session_state.model = T5ForConditionalGeneration.from_pretrained("t5-base")
-    st.session_state.model_loaded = True
-def clean_generated_text(text):
-    """Remove comandos e limpa o texto gerado"""
-    text = text.strip()
-    # Lista de prefixos de comando para remover
-    prefixes = [
-        "reescreva o seguinte texto",
-        "reescreva este texto",
-        "reescreva o texto",
-        "traduza",
-        "humanize:",
-        "humanizar:",
-        "em português",
-        "de forma mais natural"
-    ]
-# Remove os prefixos de comando
-    text_lower = text.lower()
-    for prefix in prefixes:
-        if text_lower.startswith(prefix):
-            text = text[len(prefix):].strip()
-            text_lower = text.lower()
-    # Capitaliza a primeira letra
-    if text:
-        text = text[0].upper() + text[1:]
-    return text
-def humanize_text(text):
-    """Humaniza o texto mantendo coerência e tamanho"""
-    prompt = f"reescreva em português natural, mantendo todas as informações: {text}"
-    input_ids = st.session_state.tokenizer(
-        prompt,
-        return_tensors="pt",
-        max_length=1024,
-        truncation=True
-    ).input_ids
-    # Parâmetros ajustados para melhor coerência
-    outputs = st.session_state.model.generate(
-        input_ids,
-        max_length=1024,  # 512
-        min_length=len(text.split()), # min_length=min_length,
-        do_sample=True,
-        temperature=0.1,      # Reduzido para maior coerência
-        top_p=0.95,          # Ajustado para melhor seleção de palavras
-        num_beams=3,         # Reduzido para maior velocidade
-        repetition_penalty=1.2,
-        length_penalty=2.0    # Mantém incentivo para textos mais longos
-    )
-    result = st.session_state.tokenizer.decode(outputs[0], skip_special_tokens=True)
-    result = clean_generated_text(result)
-    # Garante tamanho mínimo
-    while len(result.split()) < len(text.split()):
-        result += " " + " ".join(text.split()[-(len(text.split()) - len(result.split())):])
-    return result
 # UI Components
 st.set_page_config(page_title="Advanced Text Humanizer", page_icon="🤖")
@@ -74,18 +112,17 @@ st.set_page_config(page_title="Advanced Text Humanizer", page_icon="🤖")
 st.title("🤖 → 🧑 Humanizador de Texto Avançado")
 st.markdown("""
 Este aplicativo transforma textos robotizados em linguagem mais natural e humana,
-mantendo todas as informações originais e garantindo que o texto final seja pelo menos
-do mesmo tamanho que o original.
 """)
-# Input area with expanded capabilities
 input_text = st.text_area(
     "Cole seu texto de robô aqui:",
     height=150,
     help="Cole seu texto aqui para transformá-lo em uma versão mais natural e humana."
 )
-# Process button
 if st.button("Humanizar", type="primary"):
     if not input_text:
         st.warning("⚠️ Por favor, cole um texto primeiro!")
@@ -108,8 +145,29 @@ if st.button("Humanizar", type="primary"):
                     st.info(final_text)
                     st.write(f"Palavras: {len(final_text.split())}")
             except Exception as e:
                 st.error(f"❌ Erro no processamento: {str(e)}")
 # Footer
 st.markdown("---")
 st.markdown(

 import streamlit as st
 from transformers import T5ForConditionalGeneration, T5Tokenizer
+import torch
+from torch.utils.data import Dataset, DataLoader
+import json
+import os
+from datetime import datetime
+# Custom dataset for fine-tuning
+class TextHumanizerDataset(Dataset):
+    def __init__(self, data, tokenizer, max_length=512):
+        self.data = data
+        self.tokenizer = tokenizer
+        self.max_length = max_length
+    def __len__(self):
+        return len(self.data)
+    def __getitem__(self, idx):
+        item = self.data[idx]
+        input_encoding = self.tokenizer(
+            f"reescreva em português natural, mantendo todas as informações: {item['input_text']}",
+            max_length=self.max_length,
+            padding='max_length',
+            truncation=True,
+            return_tensors='pt'
+        )
+        target_encoding = self.tokenizer(
+            item['output_text'],
+            max_length=self.max_length,
+            padding='max_length',
+            truncation=True,
+            return_tensors='pt'
+        )
+        return {
+            'input_ids': input_encoding['input_ids'].squeeze(),
+            'attention_mask': input_encoding['attention_mask'].squeeze(),
+            'labels': target_encoding['input_ids'].squeeze()
+        }
+def save_feedback(input_text, output_text, rating):
+    """Salva o feedback do usuário para futuro treinamento"""
+    feedback_data = {
+        'input_text': input_text,
+        'output_text': output_text,
+        'rating': rating,
+        'timestamp': datetime.now().isoformat()
+    }
+    # Cria diretório se não existir
+    os.makedirs('feedback_data', exist_ok=True)
+    # Salva em arquivo JSON
+    with open('feedback_data/feedback.json', 'a') as f:
+        f.write(json.dumps(feedback_data) + '\n')
+def fine_tune_model():
+    """Realiza fine-tuning do modelo com dados de feedback positivo"""
+    if not os.path.exists('feedback_data/feedback.json'):
+        return
+    # Carrega dados de feedback
+    positive_examples = []
+    with open('feedback_data/feedback.json', 'r') as f:
+        for line in f:
+            feedback = json.loads(line)
+            if feedback['rating'] >= 4:  # Usa apenas feedback positivo
+                positive_examples.append({
+                    'input_text': feedback['input_text'],
+                    'output_text': feedback['output_text']
+                })
+    if not positive_examples:
+        return
+    # Cria dataset e dataloader
+    dataset = TextHumanizerDataset(positive_examples, st.session_state.tokenizer)
+    dataloader = DataLoader(dataset, batch_size=4, shuffle=True)
+    # Configura otimizador
+    optimizer = torch.optim.AdamW(st.session_state.model.parameters(), lr=1e-5)
+    # Fine-tuning
+    st.session_state.model.train()
+    for batch in dataloader:
+        optimizer.zero_grad()
+        outputs = st.session_state.model(
+            input_ids=batch['input_ids'],
+            attention_mask=batch['attention_mask'],
+            labels=batch['labels']
+        )
+        loss = outputs.loss
+        loss.backward()
+        optimizer.step()
+    st.session_state.model.eval()
+# Initialize session state
+if 'model_loaded' not in st.session_state:
+    st.session_state.tokenizer = T5Tokenizer.from_pretrained("t5-base")
+    st.session_state.model = T5ForConditionalGeneration.from_pretrained("t5-base")
+    st.session_state.model_loaded = True
+# Rest of your existing functions (clean_generated_text and humanize_text remain the same)
+[Previous clean_generated_text and humanize_text functions remain unchanged]
 # UI Components
 st.set_page_config(page_title="Advanced Text Humanizer", page_icon="🤖")
 st.title("🤖 → 🧑 Humanizador de Texto Avançado")
 st.markdown("""
 Este aplicativo transforma textos robotizados em linguagem mais natural e humana,
+mantendo todas as informações originais e incluindo sistema de feedback para melhoria contínua.
 """)
+# Input area
 input_text = st.text_area(
     "Cole seu texto de robô aqui:",
     height=150,
     help="Cole seu texto aqui para transformá-lo em uma versão mais natural e humana."
 )
+# Process button and results
 if st.button("Humanizar", type="primary"):
     if not input_text:
         st.warning("⚠️ Por favor, cole um texto primeiro!")
                     st.info(final_text)
                     st.write(f"Palavras: {len(final_text.split())}")
+                # Feedback section
+                st.markdown("### Feedback")
+                rating = st.slider(
+                    "Como você avalia a qualidade do texto humanizado?",
+                    min_value=1,
+                    max_value=5,
+                    value=3,
+                    help="1 = Muito ruim, 5 = Excelente"
+                )
+                if st.button("Enviar Feedback"):
+                    save_feedback(input_text, final_text, rating)
+                    st.success("Feedback salvo com sucesso! Obrigado pela contribuição.")
+                    # Trigger fine-tuning if we have enough positive feedback
+                    if rating >= 4:
+                        with st.spinner("Atualizando modelo com seu feedback..."):
+                            fine_tune_model()
+                        st.success("Modelo atualizado com sucesso!")
             except Exception as e:
                 st.error(f"❌ Erro no processamento: {str(e)}")
 # Footer
 st.markdown("---")
 st.markdown(