SoybeanMilk
commited on
Commit
•
5f1dbf2
1
Parent(s):
883ec42
Update app.py
Browse files
app.py
CHANGED
@@ -12,8 +12,8 @@ from tqdm import tqdm # Import tqdm
|
|
12 |
# Download necessary data for nltk
|
13 |
nltk.download('punkt')
|
14 |
|
15 |
-
OCR_TR_DESCRIPTION = '''# OCR Translate GeminiPro
|
16 |
-
<div id="content_align">OCR
|
17 |
|
18 |
# Getting the list of available languages for Tesseract
|
19 |
choices = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
|
@@ -57,13 +57,13 @@ def cp_text(input_text):
|
|
57 |
def cp_clear():
|
58 |
pyperclip.clear()
|
59 |
|
60 |
-
# Split the text into
|
61 |
def process_text_input_text(input_text):
|
62 |
-
# Split the text into
|
63 |
-
chunks = [input_text[i:i+
|
64 |
return chunks
|
65 |
|
66 |
-
def process_and_translate(api_key, input_text,
|
67 |
# Process the input text into chunks
|
68 |
chunks = process_text_input_text(input_text)
|
69 |
|
@@ -73,34 +73,56 @@ def process_and_translate(api_key, input_text, inputs_transStyle):
|
|
73 |
if chunk is None or chunk == "":
|
74 |
translated_chunks.append("System prompt: There is no content to translate!")
|
75 |
else:
|
76 |
-
prompt = f"
|
77 |
genai.configure(api_key=api_key)
|
78 |
model = genai.GenerativeModel('gemini-pro')
|
79 |
response = model.generate_content([prompt, chunk],
|
80 |
generation_config=genai.types.GenerationConfig(
|
81 |
# Only one candidate for now.
|
82 |
candidate_count=1,
|
83 |
-
|
84 |
-
|
85 |
-
|
|
|
86 |
)
|
87 |
translated_chunks.append(response.text)
|
88 |
|
89 |
# Join the translated chunks back together into a single string
|
90 |
-
response = '
|
91 |
|
92 |
return response
|
93 |
|
94 |
-
|
95 |
-
#
|
96 |
-
|
97 |
-
|
98 |
-
#
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
def main():
|
106 |
|
@@ -138,11 +160,14 @@ def main():
|
|
138 |
with gr.Column():
|
139 |
with gr.Row():
|
140 |
outputs_text = gr.Textbox(label="Extract content", lines=20)
|
141 |
-
|
142 |
-
default="
|
|
|
|
|
143 |
with gr.Row():
|
144 |
clear_text_btn = gr.Button('Clear')
|
145 |
translate_btn = gr.Button(value='Translate', variant="primary")
|
|
|
146 |
|
147 |
|
148 |
with gr.Row():
|
@@ -152,10 +177,10 @@ def main():
|
|
152 |
with gr.Box():
|
153 |
|
154 |
with gr.Row():
|
155 |
-
gr.Markdown("### Step 02:
|
156 |
|
157 |
with gr.Row():
|
158 |
-
outputs_tr_text = gr.Textbox(label="
|
159 |
|
160 |
with gr.Row():
|
161 |
cp_clear_btn = gr.Button(value='Clear Clipboard')
|
@@ -167,7 +192,8 @@ def main():
|
|
167 |
clear_img_btn.click(fn=clear_content, inputs=[], outputs=[inputs_img])
|
168 |
|
169 |
# ---------------------- 翻译 ----------------------
|
170 |
-
translate_btn.click(fn=process_and_translate, inputs=[inputs_api_key, outputs_text,
|
|
|
171 |
clear_text_btn.click(fn=clear_content, inputs=[], outputs=[outputs_text])
|
172 |
|
173 |
# ---------------------- 复制到剪贴板 ----------------------
|
|
|
12 |
# Download necessary data for nltk
|
13 |
nltk.download('punkt')
|
14 |
|
15 |
+
OCR_TR_DESCRIPTION = '''# OCR Translate and Summary GeminiPro
|
16 |
+
<div id="content_align">OCR system based on Tesseract</div>'''
|
17 |
|
18 |
# Getting the list of available languages for Tesseract
|
19 |
choices = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
|
|
|
57 |
def cp_clear():
|
58 |
pyperclip.clear()
|
59 |
|
60 |
+
# Split the text into 1500 character chunks
|
61 |
def process_text_input_text(input_text):
|
62 |
+
# Split the text into 1500 character chunks
|
63 |
+
chunks = [input_text[i:i+1500] for i in range(0, len(input_text), 1500)]
|
64 |
return chunks
|
65 |
|
66 |
+
def process_and_translate(api_key, input_text, src_lang, tgt_lang):
|
67 |
# Process the input text into chunks
|
68 |
chunks = process_text_input_text(input_text)
|
69 |
|
|
|
73 |
if chunk is None or chunk == "":
|
74 |
translated_chunks.append("System prompt: There is no content to translate!")
|
75 |
else:
|
76 |
+
prompt = f"This is an {src_lang} to {tgt_lang} translation, please provide the {tgt_lang} translation for this sentence. Do not provide any explanations or text apart from the translation.\n{src_lang}: "
|
77 |
genai.configure(api_key=api_key)
|
78 |
model = genai.GenerativeModel('gemini-pro')
|
79 |
response = model.generate_content([prompt, chunk],
|
80 |
generation_config=genai.types.GenerationConfig(
|
81 |
# Only one candidate for now.
|
82 |
candidate_count=1,
|
83 |
+
max_output_tokens=2048,
|
84 |
+
temperature=0.3,
|
85 |
+
top_p=1,
|
86 |
+
)
|
87 |
)
|
88 |
translated_chunks.append(response.text)
|
89 |
|
90 |
# Join the translated chunks back together into a single string
|
91 |
+
response = ''.join(translated_chunks)
|
92 |
|
93 |
return response
|
94 |
|
95 |
+
def process_and_summary(api_key, input_text, src_lang, tgt_lang):
|
96 |
+
# Process the input text into chunks
|
97 |
+
chunks = process_text_input_text(input_text)
|
98 |
+
|
99 |
+
# Translate each chunk and collect the results
|
100 |
+
translated_chunks = []
|
101 |
+
for chunk in chunks:
|
102 |
+
if chunk is None or chunk == "":
|
103 |
+
translated_chunks.append("System prompt: There is no content to translate!")
|
104 |
+
else:
|
105 |
+
prompt = f"This is an {src_lang} to {tgt_lang} summarization and knowledge key points, please provide the {tgt_lang} summarization and list the {tgt_lang} knowledge key points for this sentence. Do not provide any explanations or text apart from the summarization.\n{src_lang}: "
|
106 |
+
genai.configure(api_key=api_key)
|
107 |
+
model = genai.GenerativeModel('gemini-pro')
|
108 |
+
response = model.generate_content([prompt, chunk],
|
109 |
+
generation_config=genai.types.GenerationConfig(
|
110 |
+
# Only one candidate for now.
|
111 |
+
candidate_count=1,
|
112 |
+
max_output_tokens=2048,
|
113 |
+
temperature=0.3,
|
114 |
+
top_p=1,
|
115 |
+
)
|
116 |
+
)
|
117 |
+
translated_chunks.append(response.text)
|
118 |
+
|
119 |
+
# Join the translated chunks back together into a single string
|
120 |
+
response = '\n==================================================\n'.join(translated_chunks)
|
121 |
+
|
122 |
+
return response
|
123 |
+
|
124 |
+
# prompt = f"Display language is {tgt_lang}, do not display original text, As a Knowledge Video Content Analysis Expert, specialize in analyzing knowledge videos, identifying and clearly explaining key points in {tgt_lang}, ensuring accurate, easy-to-understand summaries suitable for diverse audiences, analyze, list key points, and explain detailedly below text: "
|
125 |
+
|
126 |
|
127 |
def main():
|
128 |
|
|
|
160 |
with gr.Column():
|
161 |
with gr.Row():
|
162 |
outputs_text = gr.Textbox(label="Extract content", lines=20)
|
163 |
+
src_lang = gr.inputs.Dropdown(choices=["Chinese (Simplified)", "Chinese (Traditional)", "English", "Japanese", "Korean"],
|
164 |
+
default="English", label='source language')
|
165 |
+
tgt_lang = gr.inputs.Dropdown(choices=["Chinese (Simplified)", "Chinese (Traditional)", "English", "Japanese", "Korean"],
|
166 |
+
default="Chinese (Traditional)", label='target language')
|
167 |
with gr.Row():
|
168 |
clear_text_btn = gr.Button('Clear')
|
169 |
translate_btn = gr.Button(value='Translate', variant="primary")
|
170 |
+
summary_btn = gr.Button(value='Summary', variant="primary")
|
171 |
|
172 |
|
173 |
with gr.Row():
|
|
|
177 |
with gr.Box():
|
178 |
|
179 |
with gr.Row():
|
180 |
+
gr.Markdown("### Step 02: Process")
|
181 |
|
182 |
with gr.Row():
|
183 |
+
outputs_tr_text = gr.Textbox(label="Process Content", lines=20)
|
184 |
|
185 |
with gr.Row():
|
186 |
cp_clear_btn = gr.Button(value='Clear Clipboard')
|
|
|
192 |
clear_img_btn.click(fn=clear_content, inputs=[], outputs=[inputs_img])
|
193 |
|
194 |
# ---------------------- 翻译 ----------------------
|
195 |
+
translate_btn.click(fn=process_and_translate, inputs=[inputs_api_key, outputs_text, src_lang, tgt_lang], outputs=[outputs_tr_text])
|
196 |
+
summary_btn.click(fn=process_and_summary, inputs=[inputs_api_key, outputs_text, src_lang, tgt_lang], outputs=[outputs_tr_text])
|
197 |
clear_text_btn.click(fn=clear_content, inputs=[], outputs=[outputs_text])
|
198 |
|
199 |
# ---------------------- 复制到剪贴板 ----------------------
|