Spaces:

SoybeanMilk
/

OCR-Translate-and-Summary-GeminiPro

Sleeping

App Files Files Community

SoybeanMilk commited on Jan 17, 2024

Commit

aa0d25a

•

1 Parent(s): 5f1dbf2

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -22

app.py CHANGED Viewed

@@ -57,10 +57,10 @@ def cp_text(input_text):
 def cp_clear():
     pyperclip.clear()
-# Split the text into 1500 character chunks
 def process_text_input_text(input_text):
-    # Split the text into 1500 character chunks
-    chunks = [input_text[i:i+1500] for i in range(0, len(input_text), 1500)]
     return chunks
 def process_and_translate(api_key, input_text, src_lang, tgt_lang):
@@ -73,22 +73,46 @@ def process_and_translate(api_key, input_text, src_lang, tgt_lang):
         if chunk is None or chunk == "":
             translated_chunks.append("System prompt: There is no content to translate!")
         else:
-            prompt = f"This is an {src_lang} to {tgt_lang} translation, please provide the {tgt_lang} translation for this sentence. Do not provide any explanations or text apart from the translation.\n{src_lang}: "
             genai.configure(api_key=api_key)
-            model = genai.GenerativeModel('gemini-pro')
             response = model.generate_content([prompt, chunk],
-                        generation_config=genai.types.GenerationConfig(
-                            # Only one candidate for now.
-                            candidate_count=1,
-                            max_output_tokens=2048,
-                            temperature=0.3,
-                            top_p=1,
-                        )
                     )
             translated_chunks.append(response.text)
     # Join the translated chunks back together into a single string
-    response = ''.join(translated_chunks)
     return response
@@ -104,20 +128,42 @@ def process_and_summary(api_key, input_text, src_lang, tgt_lang):
         else:
             prompt = f"This is an {src_lang} to {tgt_lang} summarization and knowledge key points, please provide the {tgt_lang} summarization and list the {tgt_lang} knowledge key points for this sentence. Do not provide any explanations or text apart from the summarization.\n{src_lang}: "
             genai.configure(api_key=api_key)
-            model = genai.GenerativeModel('gemini-pro')
             response = model.generate_content([prompt, chunk],
-                        generation_config=genai.types.GenerationConfig(
-                            # Only one candidate for now.
-                            candidate_count=1,
-                            max_output_tokens=2048,
-                            temperature=0.3,
-                            top_p=1,
-                        )
                     )
             translated_chunks.append(response.text)
     # Join the translated chunks back together into a single string
-    response = '\n==================================================\n'.join(translated_chunks)
     return response
@@ -199,6 +245,7 @@ def main():
         # ---------------------- 复制到剪贴板 ----------------------
         cp_btn.click(fn=cp_text, inputs=[outputs_tr_text], outputs=[])
         cp_clear_btn.click(fn=cp_clear, inputs=[], outputs=[])
     ocr_tr.launch(inbrowser=True)

 def cp_clear():
     pyperclip.clear()
+# Split the text into 2000 character chunks
 def process_text_input_text(input_text):
+    # Split the text into 2000 character chunks
+    chunks = [input_text[i:i+2000] for i in range(0, len(input_text), 2000)]
     return chunks
 def process_and_translate(api_key, input_text, src_lang, tgt_lang):
         if chunk is None or chunk == "":
             translated_chunks.append("System prompt: There is no content to translate!")
         else:
+            prompt = f"This is an {src_lang} to {tgt_lang} translation, please provide the {tgt_lang} translation for this paragraph. Do not provide any explanations or text apart from the translation.\n{src_lang}: "
+            #prompt = f"This is an {src_lang} to {tgt_lang} translation, please provide the {tgt_lang} translation for this sentence. Do not provide any explanations or text apart from the translation.\n{src_lang}: "
             genai.configure(api_key=api_key)
+            generation_config = {
+                                    "candidateCount": 1,
+                                    "maxOutputTokens": 2048,
+                                    "temperature": 0.3,
+                                    "topP": 1
+            }
+            safety_settings = [
+                    {
+                        "category": "HARM_CATEGORY_HARASSMENT",
+                        "threshold": "BLOCK_NONE",
+                    },
+                    {
+                        "category": "HARM_CATEGORY_HATE_SPEECH",
+                        "threshold": "BLOCK_NONE",
+                    },
+                    {
+                        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                        "threshold": "BLOCK_NONE",
+                    },
+                    {
+                        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                        "threshold": "BLOCK_NONE",
+                    },
+            ]
+            model = genai.GenerativeModel(model_name='gemini-pro')
             response = model.generate_content([prompt, chunk],
+                        #generation_config=generation_config,
+                        safety_settings=safety_settings
                     )
             translated_chunks.append(response.text)
     # Join the translated chunks back together into a single string
+    response = '\n\n'.join(translated_chunks)
     return response
         else:
             prompt = f"This is an {src_lang} to {tgt_lang} summarization and knowledge key points, please provide the {tgt_lang} summarization and list the {tgt_lang} knowledge key points for this sentence. Do not provide any explanations or text apart from the summarization.\n{src_lang}: "
             genai.configure(api_key=api_key)
+            generation_config = {
+                                    "candidateCount": 1,
+                                    "maxOutputTokens": 2048,
+                                    "temperature": 0.3,
+                                    "topP": 1
+            }
+            safety_settings = [
+                    {
+                        "category": "HARM_CATEGORY_HARASSMENT",
+                        "threshold": "BLOCK_NONE",
+                    },
+                    {
+                        "category": "HARM_CATEGORY_HATE_SPEECH",
+                        "threshold": "BLOCK_NONE",
+                    },
+                    {
+                        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                        "threshold": "BLOCK_NONE",
+                    },
+                    {
+                        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                        "threshold": "BLOCK_NONE",
+                    },
+            ]
+            model = genai.GenerativeModel(model_name='gemini-pro')
             response = model.generate_content([prompt, chunk],
+                        #generation_config=generation_config,
+                        safety_settings=safety_settings
                     )
             translated_chunks.append(response.text)
     # Join the translated chunks back together into a single string
+    response = '\n\n*Next Paragraph*\n\n'.join(translated_chunks)
     return response
         # ---------------------- 复制到剪贴板 ----------------------
         cp_btn.click(fn=cp_text, inputs=[outputs_tr_text], outputs=[])
         cp_clear_btn.click(fn=cp_clear, inputs=[], outputs=[])
     ocr_tr.launch(inbrowser=True)