Spaces:

ikraamkb
/

Summarization

Sleeping

App Files Files Community

ikraamkb commited on Apr 19

Commit

26435ba

verified ·

1 Parent(s): 9af5fdb

Update appImage.py

Browse files

Files changed (1) hide show

appImage.py +18 -142

appImage.py CHANGED Viewed

@@ -1,22 +1,16 @@
 import gradio as gr
-from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
-import easyocr
-from fastapi import FastAPI
-from fastapi.responses import RedirectResponse, FileResponse, JSONResponse
-import tempfile
-import os
-from gtts import gTTS
-from fpdf import FPDF
-import datetime
 from PIL import Image
 import torch
-# Initialize components
 app = FastAPI()
 # Load models - Using microsoft/git-large-coco
 try:
-    # Try loading the better model first
     processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
     git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
     print("Successfully loaded microsoft/git-large-coco model")
@@ -26,9 +20,6 @@ except Exception as e:
     captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
     USE_GIT = False
-# Initialize EasyOCR
-reader = easyocr.Reader(['en', 'fr'])  # English and French OCR
 def generate_caption(image_path):
     """Generate caption using the best available model"""
     try:
@@ -44,152 +35,37 @@ def generate_caption(image_path):
         print(f"Caption generation error: {e}")
         return "Could not generate caption"
-def analyze_image(image_path):
-    """Process image with both captioning and OCR"""
-    try:
-        # Generate image caption
-        caption = generate_caption(image_path)
-        # Extract text with EasyOCR
-        ocr_result = reader.readtext(image_path, detail=0)
-        extracted_text = "\n".join(ocr_result) if ocr_result else "No text detected"
-        return {
-            "caption": caption,
-            "extracted_text": extracted_text
-        }
-    except Exception as e:
-        return {"error": str(e)}
-def text_to_speech(text: str) -> str:
-    """Convert text to speech"""
-    try:
-        tts = gTTS(text)
-        temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
-        tts.save(temp_audio.name)
-        return temp_audio.name
-    except Exception as e:
-        print(f"Text-to-speech error: {e}")
-        return ""
-def create_pdf(content: dict, original_filename: str) -> str:
-    """Create PDF report"""
-    try:
-        pdf = FPDF()
-        pdf.add_page()
-        pdf.set_font("Arial", size=12)
-        # Title
-        pdf.set_font("Arial", 'B', 16)
-        pdf.cell(200, 10, txt="Image Analysis Report", ln=1, align='C')
-        pdf.set_font("Arial", size=12)
-        # Metadata
-        pdf.cell(200, 10, txt=f"Original file: {original_filename}", ln=1)
-        pdf.cell(200, 10, txt=f"Generated on: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", ln=1)
-        pdf.ln(10)
-        # Caption
-        pdf.set_font("", 'B')
-        pdf.cell(200, 10, txt="Image Caption:", ln=1)
-        pdf.set_font("")
-        pdf.multi_cell(0, 10, txt=content['caption'])
-        pdf.ln(5)
-        # Extracted Text
-        pdf.set_font("", 'B')
-        pdf.cell(200, 10, txt="Extracted Text:", ln=1)
-        pdf.set_font("")
-        pdf.multi_cell(0, 10, txt=content['extracted_text'])
-        temp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
-        pdf.output(temp_pdf.name)
-        return temp_pdf.name
-    except Exception as e:
-        print(f"PDF creation error: {e}")
-        return ""
-def process_image(file_path: str, enable_tts: bool):
     """Handle image processing for Gradio interface"""
     if not file_path:
-        return "Please upload an image first", "Ready", None, None
     try:
-        original_filename = os.path.basename(file_path)
-        # Analyze image
-        result = analyze_image(file_path)
-        if "error" in result:
-            return result["error"], "Error", None, None
-        # Format output
-        output_text = f"📷 Image Caption:\n{result['caption']}\n\n✍️ Extracted Text:\n{result['extracted_text']}"
-        # Generate audio
-        audio_path = text_to_speech(f"Image caption: {result['caption']}. Extracted text: {result['extracted_text']}") if enable_tts else None
-        # Generate PDF
-        pdf_path = create_pdf(result, original_filename)
-        return output_text, "Analysis complete", audio_path, pdf_path
     except Exception as e:
-        return f"Analysis error: {str(e)}", "Error", None, None
 # Gradio Interface
-with gr.Blocks(title="Image Analysis Service", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🖼️ Image Analysis Service")
-    gr.Markdown("Upload an image to get automatic captioning and text extraction")
     with gr.Row():
         with gr.Column():
             image_input = gr.Image(label="Upload Image", type="filepath")
-            tts_checkbox = gr.Checkbox(
-                label="Enable Text-to-Speech",
-                value=False
-            )
-            analyze_btn = gr.Button("Analyze Image", variant="primary")
         with gr.Column():
-            output = gr.Textbox(label="Analysis Results", lines=10)
-            status = gr.Textbox(label="Status", interactive=False)
-            audio_output = gr.Audio(label="Audio Summary", visible=False)
-            pdf_download = gr.File(label="Download Report", visible=False)
-    def toggle_audio_visibility(enable_tts):
-        return gr.Audio(visible=enable_tts)
-    def update_ui(result, status, audio_path, pdf_path):
-        return (
-            result,
-            status,
-            gr.Audio(visible=audio_path is not None, value=audio_path),
-            gr.File(visible=pdf_path is not None, value=pdf_path)
-        )
-    tts_checkbox.change(
-        fn=toggle_audio_visibility,
-        inputs=tts_checkbox,
-        outputs=audio_output
-    )
     analyze_btn.click(
         fn=process_image,
-        inputs=[image_input, tts_checkbox],
-        outputs=[output, status, audio_output, pdf_download]
-    ).then(
-        fn=update_ui,
-        inputs=[output, status, audio_output, pdf_download],
-        outputs=[output, status, audio_output, pdf_download]
     )
-# FastAPI setup
-@app.get("/files/{file_name}")
-async def get_file(file_name: str):
-    file_path = os.path.join(tempfile.gettempdir(), file_name)
-    if os.path.exists(file_path):
-        return FileResponse(file_path)
-    return JSONResponse({"error": "File not found"}, status_code=404)
 app = gr.mount_gradio_app(app, demo, path="/")
 @app.get("/")

 import gradio as gr
+from transformers import AutoProcessor, AutoModelForCausalLM
 from PIL import Image
 import torch
+from fastapi import FastAPI
+from fastapi.responses import RedirectResponse
+# Initialize FastAPI
 app = FastAPI()
 # Load models - Using microsoft/git-large-coco
 try:
+    # Load the better model
     processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
     git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
     print("Successfully loaded microsoft/git-large-coco model")
     captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
     USE_GIT = False
 def generate_caption(image_path):
     """Generate caption using the best available model"""
     try:
         print(f"Caption generation error: {e}")
         return "Could not generate caption"
+def process_image(file_path: str):
     """Handle image processing for Gradio interface"""
     if not file_path:
+        return "Please upload an image first"
     try:
+        caption = generate_caption(file_path)
+        return f"📷 Image Caption:\n{caption}"
     except Exception as e:
+        return f"Error processing image: {str(e)}"
 # Gradio Interface
+with gr.Blocks(title="Image Captioning Service", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🖼️ Image Captioning Service")
+    gr.Markdown("Upload an image to get automatic captioning")
     with gr.Row():
         with gr.Column():
             image_input = gr.Image(label="Upload Image", type="filepath")
+            analyze_btn = gr.Button("Generate Caption", variant="primary")
         with gr.Column():
+            output = gr.Textbox(label="Caption Result", lines=5)
     analyze_btn.click(
         fn=process_image,
+        inputs=[image_input],
+        outputs=[output]
     )
+# Mount Gradio app to FastAPI
 app = gr.mount_gradio_app(app, demo, path="/")
 @app.get("/")