Spaces:
Sleeping
Sleeping
import gradio as gr | |
import re | |
from difflib import Differ | |
from src.translation_agent.utils import * | |
LANGUAGES = { | |
'English': 'English', | |
'Español': 'Spanish', | |
'Français': 'French', | |
'Deutsch': 'German', | |
'Italiano': 'Italian', | |
'Português': 'Portuguese', | |
'Русский': 'Russian', | |
'中文': 'Chinese', | |
'日本語': 'Japanese', | |
'한국어': 'Korean', | |
'العربية': 'Arabic', | |
'हिन्दी': 'Hindi', | |
} | |
def diff_texts(text1, text2, lang): | |
d = Differ() | |
ic(lang) | |
if lang == '中文': | |
return [ | |
(token[2:], token[0] if token[0] != " " else None) | |
for token in d.compare(text1, text2) | |
if token[0] in ["+", " "] | |
] | |
else: | |
words1 = re.findall(r'\S+|\s+', text1) | |
words2 = re.findall(r'\S+|\s+', text2) | |
return [ | |
(token[2:], token[0] if token[0] != " " else None) | |
for token in d.compare(words1, words2) | |
if token[0] in ["+", " "] | |
] | |
def translate_text(source_lang, target_lang, source_text, country, max_tokens=MAX_TOKENS_PER_CHUNK): | |
num_tokens_in_text = num_tokens_in_string(source_text) | |
ic(num_tokens_in_text) | |
if num_tokens_in_text < max_tokens: | |
ic("Translating text as single chunk") | |
#Note: use yield from B() if put yield in function B() | |
translation_1 = one_chunk_initial_translation( | |
source_lang, target_lang, source_text | |
) | |
yield translation_1, None, None | |
reflection = one_chunk_reflect_on_translation( | |
source_lang, target_lang, source_text, translation_1, country | |
) | |
yield translation_1, reflection, None | |
translation_2 = one_chunk_improve_translation( | |
source_lang, target_lang, source_text, translation_1, reflection | |
) | |
translation_diff = diff_texts(translation_1, translation_2, target_lang) | |
yield translation_1, reflection, translation_diff | |
else: | |
ic("Translating text as multiple chunks") | |
token_size = calculate_chunk_size( | |
token_count=num_tokens_in_text, token_limit=max_tokens | |
) | |
ic(token_size) | |
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( | |
model_name = "gpt-4", | |
chunk_size=token_size, | |
chunk_overlap=0, | |
) | |
source_text_chunks = text_splitter.split_text(source_text) | |
translation_1_chunks = multichunk_initial_translation( | |
source_lang, target_lang, source_text_chunks | |
) | |
ic(translation_1_chunks) | |
translation_1 = "".join(translation_1_chunks) | |
yield translation_1, None, None | |
reflection_chunks = multichunk_reflect_on_translation( | |
source_lang, | |
target_lang, | |
source_text_chunks, | |
translation_1_chunks, | |
country, | |
) | |
ic(reflection_chunks) | |
reflection = "".join(reflection_chunks) | |
yield translation_1, reflection, None | |
translation_2_chunks = multichunk_improve_translation( | |
source_lang, | |
target_lang, | |
source_text_chunks, | |
translation_1_chunks, | |
reflection_chunks, | |
) | |
ic(translation_2_chunks) | |
translation_2 = "".join(translation_2_chunks) | |
translation_diff = diff_texts(translation_1, translation_2, target_lang) | |
yield translation_1, reflection, translation_diff | |
def update_ui(translation_1, reflection, translation_diff): | |
return gr.update(value=translation_1), gr.update(value=reflection), gr.update(value=translation_diff) | |
with gr.Blocks() as demo: | |
gr.Markdown("# Andrew Ng's Translation Agent ") | |
with gr.Row(): | |
source_lang = gr.Dropdown(choices=list(LANGUAGES.keys()), value='English', label="Source Language") | |
target_lang = gr.Dropdown(choices=list(LANGUAGES.keys()), value='中文', label="Target Language") | |
country = gr.Textbox(label="Country (for target language)") | |
source_text = gr.Textbox(label="Source Text", lines=5, show_copy_button=True) | |
btn = gr.Button("Translate") | |
with gr.Row(): | |
translation_1 = gr.Textbox(label="Initial Translation", lines=3) | |
reflection = gr.Textbox(label="Reflection", lines=3) | |
translation_diff = gr.HighlightedText (label="Final Translation", | |
combine_adjacent=True, | |
show_legend=True, | |
color_map={"+": "red"}) | |
#translation = gr.Textbox(label="Final Translation", lines=5, show_copy_button=True) | |
btn.click(translate_text, inputs=[source_lang, target_lang, source_text, country], outputs=[translation_1, reflection, translation_diff], queue=True) | |
btn.click(update_ui, inputs=[translation_1, reflection, translation_diff], outputs=[translation_1, reflection, translation_diff], queue=True) | |
demo.launch() | |