import os
import pyperclip
import gradio as gr
import nltk
import pytesseract
import google.generativeai as genai
from nltk.tokenize import sent_tokenize
from transformers import *
import torch
from tqdm import tqdm # Import tqdm
# Download necessary data for nltk'punkt')
OCR_TR_DESCRIPTION = '''# OCR Translate and Summary GeminiPro
OCR system based on Tesseract
# Getting the list of available languages for Tesseract
choices = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
# tesseract语言列表转pytesseract语言
def ocr_lang(lang_list):
lang_str = ""
lang_len = len(lang_list)
if lang_len == 1:
return lang_list[0]
for i in range(lang_len):
lang_list.insert(lang_len - i, "+")
lang_str = "".join(lang_list[:-1])
return lang_str
# ocr tesseract
def ocr_tesseract(img, languages):
ocr_str = pytesseract.image_to_string(img, lang=ocr_lang(languages))
return ocr_str
# 清除
def clear_content():
return None
import pyperclip
# 复制到剪贴板
def cp_text(input_text):
except Exception as e:
print("Error occurred while copying to clipboard")
# 清除剪贴板
def cp_clear():
# Split the text into 2000 character chunks
def process_text_input_text(input_text):
# Split the text into 2000 character chunks
chunks = [input_text[i:i+2000] for i in range(0, len(input_text), 2000)]
return chunks
def process_and_translate(api_key, input_text, src_lang, tgt_lang):
# Process the input text into chunks
chunks = process_text_input_text(input_text)
# Translate each chunk and collect the results
translated_chunks = []
for chunk in chunks:
if chunk is None or chunk == "":
translated_chunks.append("System prompt: There is no content to translate!")
prompt = f"This is an {src_lang} to {tgt_lang} translation, please provide the {tgt_lang} translation for this paragraph. Do not provide any explanations or text apart from the translation.\n{src_lang}: "
#prompt = f"This is an {src_lang} to {tgt_lang} translation, please provide the {tgt_lang} translation for this sentence. Do not provide any explanations or text apart from the translation.\n{src_lang}: "
generation_config = {
"candidateCount": 1,
"maxOutputTokens": 2048,
"temperature": 0.3,
"topP": 1
safety_settings = [
"threshold": "BLOCK_NONE",
"threshold": "BLOCK_NONE",
"threshold": "BLOCK_NONE",
"threshold": "BLOCK_NONE",
model = genai.GenerativeModel(model_name='gemini-pro')
response = model.generate_content([prompt, chunk],
# Join the translated chunks back together into a single string
response = '\n\n'.join(translated_chunks)
return response
def process_and_summary(api_key, input_text, src_lang, tgt_lang):
# Process the input text into chunks
chunks = process_text_input_text(input_text)
# Translate each chunk and collect the results
translated_chunks = []
for chunk in chunks:
if chunk is None or chunk == "":
translated_chunks.append("System prompt: There is no content to translate!")
prompt = f"This is an {src_lang} to {tgt_lang} summarization and knowledge key points, please provide the {tgt_lang} summarization and list the {tgt_lang} knowledge key points for this sentence. Do not provide any explanations or text apart from the summarization.\n{src_lang}: "
generation_config = {
"candidateCount": 1,
"maxOutputTokens": 2048,
"temperature": 0.3,
"topP": 1
safety_settings = [
"threshold": "BLOCK_NONE",
"threshold": "BLOCK_NONE",
"threshold": "BLOCK_NONE",
"threshold": "BLOCK_NONE",
model = genai.GenerativeModel(model_name='gemini-pro')
response = model.generate_content([prompt, chunk],
# Join the translated chunks back together into a single string
response = '\n\n*Next Paragraph*\n\n'.join(translated_chunks)
return response
# prompt = f"Display language is {tgt_lang}, do not display original text, As a Knowledge Video Content Analysis Expert, specialize in analyzing knowledge videos, identifying and clearly explaining key points in {tgt_lang}, ensuring accurate, easy-to-understand summaries suitable for diverse audiences, analyze, list key points, and explain detailedly below text: "
def main():
with gr.Blocks(css='style.css') as ocr_tr:
# -------------- OCR 文字提取 --------------
with gr.Box():
with gr.Row():
gr.Markdown("### Step 01: Text Extraction")
with gr.Row():
with gr.Column():
with gr.Row():
inputs_img = gr.Image(image_mode="RGB", source="upload", type="pil", label="image")
with gr.Row():
inputs_lang = gr.CheckboxGroup(choices=["chi_sim", "eng"],
with gr.Row():
clear_img_btn = gr.Button('Clear')
ocr_btn = gr.Button(value='OCR Extraction', variant="primary")
with gr.Row():
# Use Markdown to display clickable URL
gr.Markdown("[Click here to get API key](")
with gr.Row():
# Create a text input box for users to enter their API key
inputs_api_key = gr.Textbox(label="Please enter your API key here", type="password")
with gr.Column():
with gr.Row():
outputs_text = gr.Textbox(label="Extract content", lines=20)
src_lang = gr.inputs.Dropdown(choices=["Chinese (Simplified)", "Chinese (Traditional)", "English", "Japanese", "Korean"],
default="English", label='source language')
tgt_lang = gr.inputs.Dropdown(choices=["Chinese (Simplified)", "Chinese (Traditional)", "English", "Japanese", "Korean"],
default="Chinese (Traditional)", label='target language')
with gr.Row():
clear_text_btn = gr.Button('Clear')
translate_btn = gr.Button(value='Translate', variant="primary")
summary_btn = gr.Button(value='Summary', variant="primary")
with gr.Row():
# -------------- 翻译 --------------
with gr.Box():
with gr.Row():
gr.Markdown("### Step 02: Process")
with gr.Row():
outputs_tr_text = gr.Textbox(label="Process Content", lines=20)
with gr.Row():
cp_clear_btn = gr.Button(value='Clear Clipboard')
cp_btn = gr.Button(value='Copy to clipboard', variant="primary")
# ---------------------- OCR Tesseract ----------------------, inputs=[inputs_img, inputs_lang], outputs=[
outputs_text,]), inputs=[], outputs=[inputs_img])
# ---------------------- 翻译 ----------------------, inputs=[inputs_api_key, outputs_text, src_lang, tgt_lang], outputs=[outputs_tr_text]), inputs=[inputs_api_key, outputs_text, src_lang, tgt_lang], outputs=[outputs_tr_text]), inputs=[], outputs=[outputs_text])
# ---------------------- 复制到剪贴板 ----------------------, inputs=[outputs_tr_text], outputs=[]), inputs=[], outputs=[])
if __name__ == '__main__':