demo / app.py
tinchex37's picture
Update app.py
c14f240 verified
# import streamlit as st
# from transformers import pipeline
# # pipe = pipeline('sentiment-analysis')
# text = st.text_area('enter text: ')
# generator = pipeline("text-generation", model="EleutherAI/gpt-neo-2.7B")
# technical_text = """
# The CRISPR-Cas9 system enables precise genome editing by creating double-strand breaks at specific DNA locations, facilitating targeted genetic modifications.
# """
# # Prompt para transformaci贸n
# prompt = f"Rewrite the following technical text in simple terms for a general audience:\n\n{text}\n\nSimplified version:"
# # Generar texto transformado
# result = generator(
# prompt,
# max_length=256,
# num_return_sequences=1,
# do_sample=True,
# temperature=0.1,
# top_p=0.9,
# repetition_penalty=1.1,
# )
# print(result[0]['generated_text'])
# if text:
# out = pipe(text)
# st.json(out)
from transformers import pipeline
import json
# Step 1: Rewriting the technical text in accessible language using T5 model
simplifier = pipeline("summarization", model="t5-small")
def simplify_text(text):
result = simplifier(text, max_length=100, min_length=50, do_sample=False)
return result[0]['summary_text']
# Step 2: Translation to English, Arabic, and French using MarianMT models
translator_en = pipeline("translation_es_to_en", model="Helsinki-NLP/opus-mt-es-en")
translator_ar = pipeline("translation_es_to_ar", model="Helsinki-NLP/opus-mt-es-ar")
translator_fr = pipeline("translation_es_to_fr", model="Helsinki-NLP/opus-mt-es-fr")
def translate_text(text):
translations = {
"english": translator_en(text)[0]['translation_text'],
"arabic": translator_ar(text)[0]['translation_text'],
"french": translator_fr(text)[0]['translation_text']
}
return translations
# Step 3: Identify the main topic using DistilBERT
classifier = pipeline("zero-shot-classification", model="distilbert-base-uncased-finetuned-sst-2-english")
labels = ["Technology", "Science", "Health", "Business", "Education", "Other"]
def identify_topic(text):
classification = classifier(text, candidate_labels=labels)
return classification['labels'][0] # Main topic
# Step 4: Detect the tone of the text using RoBERTa
tone_analyzer = pipeline("sentiment-analysis", model="roberta-base")
def detect_tone(text):
tone_result = tone_analyzer(text)[0]
return tone_result['label'] # This gives a general idea of the tone (positive, neutral, etc.)
# Step 5: Formatting results for web service
def process_text_for_web_service(text):
simplified_text = simplify_text(text)
translations = translate_text(simplified_text)
main_topic = identify_topic(simplified_text)
tone = detect_tone(simplified_text)
# Create a structured output
result = {
"original_text": text,
"simplified_text": simplified_text,
"translations": translations,
"main_topic": main_topic,
"tone": tone
}
# Convert to JSON for web service
return json.dumps(result, ensure_ascii=False, indent=4)
# Example input text (in Spanish)
input_text = "La inteligencia artificial (IA) est谩 revolucionando la industria de la tecnolog铆a al permitir nuevas aplicaciones en m煤ltiples campos, desde la salud hasta la educaci贸n."
# Run the process
formatted_output = process_text_for_web_service(input_text)
# Output the JSON formatted result
print(formatted_output)