Spaces:

SoybeanMilk
/

OCR-Translate-and-Summary-GeminiPro

Sleeping

File size: 5,158 Bytes

import os
import pyperclip
import gradio as gr
import nltk
import pytesseract
import google.generativeai as genai
from nltk.tokenize import sent_tokenize
from transformers import *
import torch
from tqdm import tqdm  # Import tqdm

# Download necessary data for nltk
nltk.download('punkt')

OCR_TR_DESCRIPTION = '''# OCR Translate v0.2
<div id="content_align">OCR translation system based on Tesseract</div>'''

# Getting the list of available languages for Tesseract
choices = os.popen('tesseract --list-langs').read().split('\n')[1:-1]

# Replace YOUR_API_KEY with your API key from https://cloud.google.com/docs/authentication/api-keys
genai.configure(api_key="AIzaSyC_o3nrbe2fVUifhUg0Zh2oVsweSIdlEbg")
model = genai.GenerativeModel('gemini-pro')


# tesseract语言列表转pytesseract语言
def ocr_lang(lang_list):
    lang_str = ""
    lang_len = len(lang_list)
    if lang_len == 1:
        return lang_list[0]
    else:
        for i in range(lang_len):
            lang_list.insert(lang_len - i, "+")

        lang_str = "".join(lang_list[:-1])
        return lang_str


# ocr tesseract
def ocr_tesseract(img, languages):
    ocr_str = pytesseract.image_to_string(img, lang=ocr_lang(languages))
    return ocr_str


# 清除
def clear_content():
    return None


import pyperclip

# 复制到剪贴板
def cp_text(input_text):
    try:
        pyperclip.copy(input_text)
    except Exception as e:
        print("Error occurred while copying to clipboard")
        print(e)

# 清除剪贴板
def cp_clear():
    pyperclip.clear()

# Add a translation function
def translate(input_text, inputs_transStyle):
    if input_text is None or input_text == "":
        return "System prompt: There is no content to translate!"
        
    # 选择翻译模型
    prompt = f"Please reformat the following article to have clear paragraph breaks and correct punctuation, and then translate it into {inputs_transStyle}. In the translation, do not display the original text, fictional content, or any repeated content. Ensure that the original meaning and context are preserved as much as possible."
    # Reorder for ease of reading and translate into {inputs_transStyle}"
    response = model.generate_content([prompt, input_text])
    return response.text

def main():

    with gr.Blocks(css='style.css') as ocr_tr:
        gr.Markdown(OCR_TR_DESCRIPTION)

        # -------------- OCR 文字提取 --------------
        with gr.Box():

            with gr.Row():
                gr.Markdown("### Step 01: Text Extraction")

            with gr.Row():
                with gr.Column():
                    with gr.Row():
                        inputs_img = gr.Image(image_mode="RGB", source="upload", type="pil", label="image")
                    with gr.Row():
                        inputs_lang = gr.CheckboxGroup(choices=["chi_sim", "eng"],
                                                       type="value",
                                                       value=['eng'],
                                                       label='language')

                    with gr.Row():
                        clear_img_btn = gr.Button('Clear')
                        ocr_btn = gr.Button(value='OCR Extraction', variant="primary")

                with gr.Column():
                    with gr.Row():
                        outputs_text = gr.Textbox(label="Extract content", lines=20)
                    inputs_transStyle = gr.inputs.Dropdown(choices=["Chinese (Simplified)", "Chinese (Traditional)", "English", "Japanese", "Korean"], 
                                                           default="Chinese (Simplified)", label='translation mode')
                    with gr.Row():
                        clear_text_btn = gr.Button('Clear')
                        translate_btn = gr.Button(value='Translate', variant="primary")

            with gr.Row():
                pass

        # -------------- 翻译 --------------
        with gr.Box():

            with gr.Row():
                gr.Markdown("### Step 02: Translation")

            with gr.Row():
                outputs_tr_text = gr.Textbox(label="Translate Content", lines=20)

            with gr.Row():
                cp_clear_btn = gr.Button(value='Clear Clipboard')
                cp_btn = gr.Button(value='Copy to clipboard', variant="primary")

        # ---------------------- OCR Tesseract ----------------------
        ocr_btn.click(fn=ocr_tesseract, inputs=[inputs_img, inputs_lang], outputs=[
            outputs_text,])
        clear_img_btn.click(fn=clear_content, inputs=[], outputs=[inputs_img])

        # ---------------------- 翻译 ----------------------
        translate_btn.click(fn=translate, inputs=[outputs_text, inputs_transStyle], outputs=[outputs_tr_text])
        clear_text_btn.click(fn=clear_content, inputs=[], outputs=[outputs_text])

        # ---------------------- 复制到剪贴板 ----------------------
        cp_btn.click(fn=cp_text, inputs=[outputs_tr_text], outputs=[])
        cp_clear_btn.click(fn=cp_clear, inputs=[], outputs=[])

    ocr_tr.launch(inbrowser=True, share=True)

if __name__ == '__main__':
    main()