File size: 5,004 Bytes
78163ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import gradio as gr
import re
from difflib import Differ
from src.translation_agent.utils import *
LANGUAGES = {
'English': 'English',
'Español': 'Spanish',
'Français': 'French',
'Deutsch': 'German',
'Italiano': 'Italian',
'Português': 'Portuguese',
'Русский': 'Russian',
'中文': 'Chinese',
'日本語': 'Japanese',
'한국어': 'Korean',
'العربية': 'Arabic',
'हिन्दी': 'Hindi',
}
def diff_texts(text1, text2, lang):
d = Differ()
ic(lang)
if lang == '中文':
return [
(token[2:], token[0] if token[0] != " " else None)
for token in d.compare(text1, text2)
if token[0] in ["+", " "]
]
else:
words1 = re.findall(r'\S+|\s+', text1)
words2 = re.findall(r'\S+|\s+', text2)
return [
(token[2:], token[0] if token[0] != " " else None)
for token in d.compare(words1, words2)
if token[0] in ["+", " "]
]
def translate_text(source_lang, target_lang, source_text, country, max_tokens=MAX_TOKENS_PER_CHUNK):
num_tokens_in_text = num_tokens_in_string(source_text)
ic(num_tokens_in_text)
if num_tokens_in_text < max_tokens:
ic("Translating text as single chunk")
#Note: use yield from B() if put yield in function B()
translation_1 = one_chunk_initial_translation(
source_lang, target_lang, source_text
)
yield translation_1, None, None
reflection = one_chunk_reflect_on_translation(
source_lang, target_lang, source_text, translation_1, country
)
yield translation_1, reflection, None
translation_2 = one_chunk_improve_translation(
source_lang, target_lang, source_text, translation_1, reflection
)
translation_diff = diff_texts(translation_1, translation_2, target_lang)
yield translation_1, reflection, translation_diff
else:
ic("Translating text as multiple chunks")
token_size = calculate_chunk_size(
token_count=num_tokens_in_text, token_limit=max_tokens
)
ic(token_size)
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
model_name = "gpt-4",
chunk_size=token_size,
chunk_overlap=0,
)
source_text_chunks = text_splitter.split_text(source_text)
translation_1_chunks = multichunk_initial_translation(
source_lang, target_lang, source_text_chunks
)
ic(translation_1_chunks)
translation_1 = "".join(translation_1_chunks)
yield translation_1, None, None
reflection_chunks = multichunk_reflect_on_translation(
source_lang,
target_lang,
source_text_chunks,
translation_1_chunks,
country,
)
ic(reflection_chunks)
reflection = "".join(reflection_chunks)
yield translation_1, reflection, None
translation_2_chunks = multichunk_improve_translation(
source_lang,
target_lang,
source_text_chunks,
translation_1_chunks,
reflection_chunks,
)
ic(translation_2_chunks)
translation_2 = "".join(translation_2_chunks)
translation_diff = diff_texts(translation_1, translation_2, target_lang)
yield translation_1, reflection, translation_diff
def update_ui(translation_1, reflection, translation_diff):
return gr.update(value=translation_1), gr.update(value=reflection), gr.update(value=translation_diff)
with gr.Blocks() as demo:
gr.Markdown("# Andrew Ng's Translation Agent ")
with gr.Row():
source_lang = gr.Dropdown(choices=list(LANGUAGES.keys()), value='English', label="Source Language")
target_lang = gr.Dropdown(choices=list(LANGUAGES.keys()), value='中文', label="Target Language")
country = gr.Textbox(label="Country (for target language)")
source_text = gr.Textbox(label="Source Text", lines=5, show_copy_button=True)
btn = gr.Button("Translate")
with gr.Row():
translation_1 = gr.Textbox(label="Initial Translation", lines=3)
reflection = gr.Textbox(label="Reflection", lines=3)
translation_diff = gr.HighlightedText (label="Final Translation",
combine_adjacent=True,
show_legend=True,
color_map={"+": "red"})
#translation = gr.Textbox(label="Final Translation", lines=5, show_copy_button=True)
btn.click(translate_text, inputs=[source_lang, target_lang, source_text, country], outputs=[translation_1, reflection, translation_diff], queue=True)
btn.click(update_ui, inputs=[translation_1, reflection, translation_diff], outputs=[translation_1, reflection, translation_diff], queue=True)
demo.launch()
|