Spaces:

SoybeanMilk
/

OCR-Translate-and-Summary-GeminiPro

Sleeping

App Files Files Community

SoybeanMilk commited on Jan 16, 2024

Commit

5f1dbf2

•

1 Parent(s): 883ec42

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -26

app.py CHANGED Viewed

@@ -12,8 +12,8 @@ from tqdm import tqdm  # Import tqdm
 # Download necessary data for nltk
 nltk.download('punkt')
-OCR_TR_DESCRIPTION = '''# OCR Translate GeminiPro
-<div id="content_align">OCR translation system based on Tesseract</div>'''
 # Getting the list of available languages for Tesseract
 choices = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
@@ -57,13 +57,13 @@ def cp_text(input_text):
 def cp_clear():
     pyperclip.clear()
-# Split the text into 4000 character chunks
 def process_text_input_text(input_text):
-    # Split the text into 4000 character chunks
-    chunks = [input_text[i:i+4000] for i in range(0, len(input_text), 4000)]
     return chunks
-def process_and_translate(api_key, input_text, inputs_transStyle):
     # Process the input text into chunks
     chunks = process_text_input_text(input_text)
@@ -73,34 +73,56 @@ def process_and_translate(api_key, input_text, inputs_transStyle):
         if chunk is None or chunk == "":
             translated_chunks.append("System prompt: There is no content to translate!")
         else:
-            prompt = f"Display language is {inputs_transStyle}, do not display original text, As a Knowledge Video Content Analysis Expert, specialize in analyzing knowledge videos, identifying and clearly explaining key points in {inputs_transStyle}, ensuring accurate, easy-to-understand summaries suitable for diverse audiences, list key points, and explain detailedly below text: "
             genai.configure(api_key=api_key)
             model = genai.GenerativeModel('gemini-pro')
             response = model.generate_content([prompt, chunk],
                         generation_config=genai.types.GenerationConfig(
                             # Only one candidate for now.
                             candidate_count=1,
-                            stop_sequences=['ʤ'],
-                            max_output_tokens=2048,
-                            temperature=1.0)
                     )
             translated_chunks.append(response.text)
     # Join the translated chunks back together into a single string
-    response = '\n--------------------------------------------------\n'.join(translated_chunks)
     return response
-# Add a translation function
-# def translate(api_key, input_text, inputs_transStyle):
-#     genai.configure(api_key=api_key)
-#     model = genai.GenerativeModel('gemini-pro')
-#     if input_text is None or input_text == "":
-#         return "System prompt: There is no content to translate!"
-#
-#     prompt = f"Via {inputs_transStyle}, As a Knowledge Video Content Analysis Expert, I specialize in analyzing knowledge videos, identifying and clearly explaining key points in English, ensuring accurate, easy-to-understand summaries suitable for diverse audiences."
-#     response = model.generate_content([prompt, input_text])
-#     return response.text
 def main():
@@ -138,11 +160,14 @@ def main():
                 with gr.Column():
                     with gr.Row():
                         outputs_text = gr.Textbox(label="Extract content", lines=20)
-                    inputs_transStyle = gr.inputs.Dropdown(choices=["Chinese (Simplified)", "Chinese (Traditional)", "English", "Japanese", "Korean"],
-                                                           default="Chinese (Traditional)", label='translation mode')
                     with gr.Row():
                         clear_text_btn = gr.Button('Clear')
                         translate_btn = gr.Button(value='Translate', variant="primary")
             with gr.Row():
@@ -152,10 +177,10 @@ def main():
         with gr.Box():
             with gr.Row():
-                gr.Markdown("### Step 02: Translation")
             with gr.Row():
-                outputs_tr_text = gr.Textbox(label="Translate Content", lines=20)
             with gr.Row():
                 cp_clear_btn = gr.Button(value='Clear Clipboard')
@@ -167,7 +192,8 @@ def main():
         clear_img_btn.click(fn=clear_content, inputs=[], outputs=[inputs_img])
         # ---------------------- 翻译 ----------------------
-        translate_btn.click(fn=process_and_translate, inputs=[inputs_api_key, outputs_text, inputs_transStyle], outputs=[outputs_tr_text])
         clear_text_btn.click(fn=clear_content, inputs=[], outputs=[outputs_text])
         # ---------------------- 复制到剪贴板 ----------------------

 # Download necessary data for nltk
 nltk.download('punkt')
+OCR_TR_DESCRIPTION = '''# OCR Translate and Summary GeminiPro
+<div id="content_align">OCR system based on Tesseract</div>'''
 # Getting the list of available languages for Tesseract
 choices = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
 def cp_clear():
     pyperclip.clear()
+# Split the text into 1500 character chunks
 def process_text_input_text(input_text):
+    # Split the text into 1500 character chunks
+    chunks = [input_text[i:i+1500] for i in range(0, len(input_text), 1500)]
     return chunks
+def process_and_translate(api_key, input_text, src_lang, tgt_lang):
     # Process the input text into chunks
     chunks = process_text_input_text(input_text)
         if chunk is None or chunk == "":
             translated_chunks.append("System prompt: There is no content to translate!")
         else:
+            prompt = f"This is an {src_lang} to {tgt_lang} translation, please provide the {tgt_lang} translation for this sentence. Do not provide any explanations or text apart from the translation.\n{src_lang}: "
             genai.configure(api_key=api_key)
             model = genai.GenerativeModel('gemini-pro')
             response = model.generate_content([prompt, chunk],
                         generation_config=genai.types.GenerationConfig(
                             # Only one candidate for now.
                             candidate_count=1,
+                            max_output_tokens=2048,
+                            temperature=0.3,
+                            top_p=1,
+                        )
                     )
             translated_chunks.append(response.text)
     # Join the translated chunks back together into a single string
+    response = ''.join(translated_chunks)
     return response
+def process_and_summary(api_key, input_text, src_lang, tgt_lang):
+    # Process the input text into chunks
+    chunks = process_text_input_text(input_text)
+    # Translate each chunk and collect the results
+    translated_chunks = []
+    for chunk in chunks:
+        if chunk is None or chunk == "":
+            translated_chunks.append("System prompt: There is no content to translate!")
+        else:
+            prompt = f"This is an {src_lang} to {tgt_lang} summarization and knowledge key points, please provide the {tgt_lang} summarization and list the {tgt_lang} knowledge key points for this sentence. Do not provide any explanations or text apart from the summarization.\n{src_lang}: "
+            genai.configure(api_key=api_key)
+            model = genai.GenerativeModel('gemini-pro')
+            response = model.generate_content([prompt, chunk],
+                        generation_config=genai.types.GenerationConfig(
+                            # Only one candidate for now.
+                            candidate_count=1,
+                            max_output_tokens=2048,
+                            temperature=0.3,
+                            top_p=1,
+                        )
+                    )
+            translated_chunks.append(response.text)
+    # Join the translated chunks back together into a single string
+    response = '\n==================================================\n'.join(translated_chunks)
+    return response
+# prompt = f"Display language is {tgt_lang}, do not display original text, As a Knowledge Video Content Analysis Expert, specialize in analyzing knowledge videos, identifying and clearly explaining key points in {tgt_lang}, ensuring accurate, easy-to-understand summaries suitable for diverse audiences, analyze, list key points, and explain detailedly below text: "
 def main():
                 with gr.Column():
                     with gr.Row():
                         outputs_text = gr.Textbox(label="Extract content", lines=20)
+                    src_lang = gr.inputs.Dropdown(choices=["Chinese (Simplified)", "Chinese (Traditional)", "English", "Japanese", "Korean"],
+                                                           default="English", label='source language')
+                    tgt_lang = gr.inputs.Dropdown(choices=["Chinese (Simplified)", "Chinese (Traditional)", "English", "Japanese", "Korean"],
+                                                           default="Chinese (Traditional)", label='target language')
                     with gr.Row():
                         clear_text_btn = gr.Button('Clear')
                         translate_btn = gr.Button(value='Translate', variant="primary")
+                        summary_btn = gr.Button(value='Summary', variant="primary")
             with gr.Row():
         with gr.Box():
             with gr.Row():
+                gr.Markdown("### Step 02: Process")
             with gr.Row():
+                outputs_tr_text = gr.Textbox(label="Process Content", lines=20)
             with gr.Row():
                 cp_clear_btn = gr.Button(value='Clear Clipboard')
         clear_img_btn.click(fn=clear_content, inputs=[], outputs=[inputs_img])
         # ---------------------- 翻译 ----------------------
+        translate_btn.click(fn=process_and_translate, inputs=[inputs_api_key, outputs_text, src_lang, tgt_lang], outputs=[outputs_tr_text])
+        summary_btn.click(fn=process_and_summary, inputs=[inputs_api_key, outputs_text, src_lang, tgt_lang], outputs=[outputs_tr_text])
         clear_text_btn.click(fn=clear_content, inputs=[], outputs=[outputs_text])
         # ---------------------- 复制到剪贴板 ----------------------