SoybeanMilk commited on
Commit
5f1dbf2
1 Parent(s): 883ec42

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -26
app.py CHANGED
@@ -12,8 +12,8 @@ from tqdm import tqdm # Import tqdm
12
  # Download necessary data for nltk
13
  nltk.download('punkt')
14
 
15
- OCR_TR_DESCRIPTION = '''# OCR Translate GeminiPro
16
- <div id="content_align">OCR translation system based on Tesseract</div>'''
17
 
18
  # Getting the list of available languages for Tesseract
19
  choices = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
@@ -57,13 +57,13 @@ def cp_text(input_text):
57
  def cp_clear():
58
  pyperclip.clear()
59
 
60
- # Split the text into 4000 character chunks
61
  def process_text_input_text(input_text):
62
- # Split the text into 4000 character chunks
63
- chunks = [input_text[i:i+4000] for i in range(0, len(input_text), 4000)]
64
  return chunks
65
 
66
- def process_and_translate(api_key, input_text, inputs_transStyle):
67
  # Process the input text into chunks
68
  chunks = process_text_input_text(input_text)
69
 
@@ -73,34 +73,56 @@ def process_and_translate(api_key, input_text, inputs_transStyle):
73
  if chunk is None or chunk == "":
74
  translated_chunks.append("System prompt: There is no content to translate!")
75
  else:
76
- prompt = f"Display language is {inputs_transStyle}, do not display original text, As a Knowledge Video Content Analysis Expert, specialize in analyzing knowledge videos, identifying and clearly explaining key points in {inputs_transStyle}, ensuring accurate, easy-to-understand summaries suitable for diverse audiences, list key points, and explain detailedly below text: "
77
  genai.configure(api_key=api_key)
78
  model = genai.GenerativeModel('gemini-pro')
79
  response = model.generate_content([prompt, chunk],
80
  generation_config=genai.types.GenerationConfig(
81
  # Only one candidate for now.
82
  candidate_count=1,
83
- stop_sequences=['ʤ'],
84
- max_output_tokens=2048,
85
- temperature=1.0)
 
86
  )
87
  translated_chunks.append(response.text)
88
 
89
  # Join the translated chunks back together into a single string
90
- response = '\n--------------------------------------------------\n'.join(translated_chunks)
91
 
92
  return response
93
 
94
- # Add a translation function
95
- # def translate(api_key, input_text, inputs_transStyle):
96
- # genai.configure(api_key=api_key)
97
- # model = genai.GenerativeModel('gemini-pro')
98
- # if input_text is None or input_text == "":
99
- # return "System prompt: There is no content to translate!"
100
- #
101
- # prompt = f"Via {inputs_transStyle}, As a Knowledge Video Content Analysis Expert, I specialize in analyzing knowledge videos, identifying and clearly explaining key points in English, ensuring accurate, easy-to-understand summaries suitable for diverse audiences."
102
- # response = model.generate_content([prompt, input_text])
103
- # return response.text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
  def main():
106
 
@@ -138,11 +160,14 @@ def main():
138
  with gr.Column():
139
  with gr.Row():
140
  outputs_text = gr.Textbox(label="Extract content", lines=20)
141
- inputs_transStyle = gr.inputs.Dropdown(choices=["Chinese (Simplified)", "Chinese (Traditional)", "English", "Japanese", "Korean"],
142
- default="Chinese (Traditional)", label='translation mode')
 
 
143
  with gr.Row():
144
  clear_text_btn = gr.Button('Clear')
145
  translate_btn = gr.Button(value='Translate', variant="primary")
 
146
 
147
 
148
  with gr.Row():
@@ -152,10 +177,10 @@ def main():
152
  with gr.Box():
153
 
154
  with gr.Row():
155
- gr.Markdown("### Step 02: Translation")
156
 
157
  with gr.Row():
158
- outputs_tr_text = gr.Textbox(label="Translate Content", lines=20)
159
 
160
  with gr.Row():
161
  cp_clear_btn = gr.Button(value='Clear Clipboard')
@@ -167,7 +192,8 @@ def main():
167
  clear_img_btn.click(fn=clear_content, inputs=[], outputs=[inputs_img])
168
 
169
  # ---------------------- 翻译 ----------------------
170
- translate_btn.click(fn=process_and_translate, inputs=[inputs_api_key, outputs_text, inputs_transStyle], outputs=[outputs_tr_text])
 
171
  clear_text_btn.click(fn=clear_content, inputs=[], outputs=[outputs_text])
172
 
173
  # ---------------------- 复制到剪贴板 ----------------------
 
12
  # Download necessary data for nltk
13
  nltk.download('punkt')
14
 
15
+ OCR_TR_DESCRIPTION = '''# OCR Translate and Summary GeminiPro
16
+ <div id="content_align">OCR system based on Tesseract</div>'''
17
 
18
  # Getting the list of available languages for Tesseract
19
  choices = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
 
57
  def cp_clear():
58
  pyperclip.clear()
59
 
60
+ # Split the text into 1500 character chunks
61
  def process_text_input_text(input_text):
62
+ # Split the text into 1500 character chunks
63
+ chunks = [input_text[i:i+1500] for i in range(0, len(input_text), 1500)]
64
  return chunks
65
 
66
+ def process_and_translate(api_key, input_text, src_lang, tgt_lang):
67
  # Process the input text into chunks
68
  chunks = process_text_input_text(input_text)
69
 
 
73
  if chunk is None or chunk == "":
74
  translated_chunks.append("System prompt: There is no content to translate!")
75
  else:
76
+ prompt = f"This is an {src_lang} to {tgt_lang} translation, please provide the {tgt_lang} translation for this sentence. Do not provide any explanations or text apart from the translation.\n{src_lang}: "
77
  genai.configure(api_key=api_key)
78
  model = genai.GenerativeModel('gemini-pro')
79
  response = model.generate_content([prompt, chunk],
80
  generation_config=genai.types.GenerationConfig(
81
  # Only one candidate for now.
82
  candidate_count=1,
83
+ max_output_tokens=2048,
84
+ temperature=0.3,
85
+ top_p=1,
86
+ )
87
  )
88
  translated_chunks.append(response.text)
89
 
90
  # Join the translated chunks back together into a single string
91
+ response = ''.join(translated_chunks)
92
 
93
  return response
94
 
95
+ def process_and_summary(api_key, input_text, src_lang, tgt_lang):
96
+ # Process the input text into chunks
97
+ chunks = process_text_input_text(input_text)
98
+
99
+ # Translate each chunk and collect the results
100
+ translated_chunks = []
101
+ for chunk in chunks:
102
+ if chunk is None or chunk == "":
103
+ translated_chunks.append("System prompt: There is no content to translate!")
104
+ else:
105
+ prompt = f"This is an {src_lang} to {tgt_lang} summarization and knowledge key points, please provide the {tgt_lang} summarization and list the {tgt_lang} knowledge key points for this sentence. Do not provide any explanations or text apart from the summarization.\n{src_lang}: "
106
+ genai.configure(api_key=api_key)
107
+ model = genai.GenerativeModel('gemini-pro')
108
+ response = model.generate_content([prompt, chunk],
109
+ generation_config=genai.types.GenerationConfig(
110
+ # Only one candidate for now.
111
+ candidate_count=1,
112
+ max_output_tokens=2048,
113
+ temperature=0.3,
114
+ top_p=1,
115
+ )
116
+ )
117
+ translated_chunks.append(response.text)
118
+
119
+ # Join the translated chunks back together into a single string
120
+ response = '\n==================================================\n'.join(translated_chunks)
121
+
122
+ return response
123
+
124
+ # prompt = f"Display language is {tgt_lang}, do not display original text, As a Knowledge Video Content Analysis Expert, specialize in analyzing knowledge videos, identifying and clearly explaining key points in {tgt_lang}, ensuring accurate, easy-to-understand summaries suitable for diverse audiences, analyze, list key points, and explain detailedly below text: "
125
+
126
 
127
  def main():
128
 
 
160
  with gr.Column():
161
  with gr.Row():
162
  outputs_text = gr.Textbox(label="Extract content", lines=20)
163
+ src_lang = gr.inputs.Dropdown(choices=["Chinese (Simplified)", "Chinese (Traditional)", "English", "Japanese", "Korean"],
164
+ default="English", label='source language')
165
+ tgt_lang = gr.inputs.Dropdown(choices=["Chinese (Simplified)", "Chinese (Traditional)", "English", "Japanese", "Korean"],
166
+ default="Chinese (Traditional)", label='target language')
167
  with gr.Row():
168
  clear_text_btn = gr.Button('Clear')
169
  translate_btn = gr.Button(value='Translate', variant="primary")
170
+ summary_btn = gr.Button(value='Summary', variant="primary")
171
 
172
 
173
  with gr.Row():
 
177
  with gr.Box():
178
 
179
  with gr.Row():
180
+ gr.Markdown("### Step 02: Process")
181
 
182
  with gr.Row():
183
+ outputs_tr_text = gr.Textbox(label="Process Content", lines=20)
184
 
185
  with gr.Row():
186
  cp_clear_btn = gr.Button(value='Clear Clipboard')
 
192
  clear_img_btn.click(fn=clear_content, inputs=[], outputs=[inputs_img])
193
 
194
  # ---------------------- 翻译 ----------------------
195
+ translate_btn.click(fn=process_and_translate, inputs=[inputs_api_key, outputs_text, src_lang, tgt_lang], outputs=[outputs_tr_text])
196
+ summary_btn.click(fn=process_and_summary, inputs=[inputs_api_key, outputs_text, src_lang, tgt_lang], outputs=[outputs_tr_text])
197
  clear_text_btn.click(fn=clear_content, inputs=[], outputs=[outputs_text])
198
 
199
  # ---------------------- 复制到剪贴板 ----------------------