import os
import pyperclip
import gradio as gr
import nltk
import pytesseract
import google.generativeai as genai
from nltk.tokenize import sent_tokenize
from transformers import *
import torch
from tqdm import tqdm # Import tqdm
import time
# Download necessary data for nltk
nltk.download('punkt')
OCR_TR_DESCRIPTION = '''# OCR Translate and Summary GeminiPro
OCR system based on Tesseract
'''
# Getting the list of available languages for Tesseract
choices = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
# tesseract语言列表转pytesseract语言
def ocr_lang(lang_list):
lang_str = ""
lang_len = len(lang_list)
if lang_len == 1:
return lang_list[0]
else:
for i in range(lang_len):
lang_list.insert(lang_len - i, "+")
lang_str = "".join(lang_list[:-1])
return lang_str
# ocr tesseract
def ocr_tesseract(img, languages):
ocr_str = pytesseract.image_to_string(img, lang=ocr_lang(languages))
return ocr_str
# 清除
def clear_content():
return None
import pyperclip
# 复制到剪贴板
def cp_text(input_text):
try:
pyperclip.copy(input_text)
except Exception as e:
print("Error occurred while copying to clipboard")
print(e)
# 清除剪贴板
def cp_clear():
pyperclip.clear()
# Split the text into 2000 character chunks
def process_text_input_text(input_text):
# Split the text into 2000 character chunks
chunks = [input_text[i:i+2000] for i in range(0, len(input_text), 2000)]
return chunks
def process_and_translate(api_key, input_text, src_lang, tgt_lang):
# Process the input text into chunks
chunks = process_text_input_text(input_text)
# Translate each chunk and collect the results
translated_chunks = []
for chunk in chunks:
if chunk is None or chunk == "":
translated_chunks.append("System prompt: There is no content to translate!")
else:
prompt = f"This is an {src_lang} to {tgt_lang} translation, please provide the {tgt_lang} translation for this paragraph. Do not provide any explanations or text apart from the translation.\n{src_lang}: "
#prompt = f"This is an {src_lang} to {tgt_lang} translation, please provide the {tgt_lang} translation for this sentence. Do not provide any explanations or text apart from the translation.\n{src_lang}: "
genai.configure(api_key=api_key)
generation_config = {
"candidateCount": 1,
"maxOutputTokens": 2048,
"temperature": 0.3,
"topP": 1
}
safety_settings = [
{
"category": "HARM_CATEGORY_HARASSMENT",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_HATE_SPEECH",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
"threshold": "BLOCK_NONE",
},
]
model = genai.GenerativeModel(model_name='gemini-pro')
response = model.generate_content([prompt, chunk],
#generation_config=generation_config,
safety_settings=safety_settings
)
translated_chunks.append(response.text)
# Join the translated chunks back together into a single string
response = '\n\n'.join(translated_chunks)
return response
def process_and_summary(api_key, input_text, src_lang, tgt_lang):
# Process the input text into chunks
chunks = process_text_input_text(input_text)
time.sleep(30)
# Translate each chunk and collect the results
translated_chunks = []
for chunk in chunks:
if chunk is None or chunk == "":
translated_chunks.append("System prompt: There is no content to translate!")
else:
prompt = f"This is an {src_lang} to {tgt_lang} summarization and knowledge key points, please provide the {tgt_lang} summarization and list the {tgt_lang} knowledge key points for this sentence. Do not provide any explanations or text apart from the summarization.\n{src_lang}: "
genai.configure(api_key=api_key)
generation_config = {
"candidateCount": 1,
"maxOutputTokens": 2048,
"temperature": 0.3,
"topP": 1
}
safety_settings = [
{
"category": "HARM_CATEGORY_HARASSMENT",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_HATE_SPEECH",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
"threshold": "BLOCK_NONE",
},
]
model = genai.GenerativeModel(model_name='gemini-pro')
response = model.generate_content([prompt, chunk],
#generation_config=generation_config,
safety_settings=safety_settings
)
translated_chunks.append(response.text)
# Join the translated chunks back together into a single string
response = '\n\n*Next Paragraph*\n\n'.join(translated_chunks)
return response
# prompt = f"Display language is {tgt_lang}, do not display original text, As a Knowledge Video Content Analysis Expert, specialize in analyzing knowledge videos, identifying and clearly explaining key points in {tgt_lang}, ensuring accurate, easy-to-understand summaries suitable for diverse audiences, analyze, list key points, and explain detailedly below text: "
def main():
with gr.Blocks(css='style.css') as ocr_tr:
gr.Markdown(OCR_TR_DESCRIPTION)
# -------------- OCR 文字提取 --------------
with gr.Box():
with gr.Row():
gr.Markdown("### Step 01: Text Extraction")
with gr.Row():
with gr.Column():
with gr.Row():
inputs_img = gr.Image(image_mode="RGB", source="upload", type="pil", label="image")
with gr.Row():
inputs_lang = gr.CheckboxGroup(choices=["chi_sim", "eng"],
type="value",
value=['eng'],
label='language')
with gr.Row():
clear_img_btn = gr.Button('Clear')
ocr_btn = gr.Button(value='OCR Extraction', variant="primary")
with gr.Row():
# Use Markdown to display clickable URL
gr.Markdown("[Click here to get API key](https://makersuite.google.com/u/1/app/apikey)")
with gr.Row():
# Create a text input box for users to enter their API key
inputs_api_key = gr.Textbox(label="Please enter your API key here", type="password")
with gr.Column():
with gr.Row():
outputs_text = gr.Textbox(label="Extract content", lines=20)
src_lang = gr.inputs.Dropdown(choices=["Chinese (Simplified)", "Chinese (Traditional)", "English", "Japanese", "Korean"],
default="English", label='source language')
tgt_lang = gr.inputs.Dropdown(choices=["Chinese (Simplified)", "Chinese (Traditional)", "English", "Japanese", "Korean"],
default="Chinese (Traditional)", label='target language')
with gr.Row():
clear_text_btn = gr.Button('Clear')
translate_btn = gr.Button(value='Translate', variant="primary")
summary_btn = gr.Button(value='Summary', variant="primary")
with gr.Row():
pass
# -------------- 翻译 --------------
with gr.Box():
with gr.Row():
gr.Markdown("### Step 02: Process")
with gr.Row():
outputs_tr_text = gr.Textbox(label="Process Content", lines=20)
with gr.Row():
cp_clear_btn = gr.Button(value='Clear Clipboard')
cp_btn = gr.Button(value='Copy to clipboard', variant="primary")
# ---------------------- OCR Tesseract ----------------------
ocr_btn.click(fn=ocr_tesseract, inputs=[inputs_img, inputs_lang], outputs=[
outputs_text,])
clear_img_btn.click(fn=clear_content, inputs=[], outputs=[inputs_img])
# ---------------------- 翻译 ----------------------
translate_btn.click(fn=process_and_translate, inputs=[inputs_api_key, outputs_text, src_lang, tgt_lang], outputs=[outputs_tr_text])
summary_btn.click(fn=process_and_summary, inputs=[inputs_api_key, outputs_text, src_lang, tgt_lang], outputs=[outputs_tr_text])
clear_text_btn.click(fn=clear_content, inputs=[], outputs=[outputs_text])
# ---------------------- 复制到剪贴板 ----------------------
cp_btn.click(fn=cp_text, inputs=[outputs_tr_text], outputs=[])
cp_clear_btn.click(fn=cp_clear, inputs=[], outputs=[])
ocr_tr.launch(inbrowser=True)
if __name__ == '__main__':
main()