Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -51,14 +51,14 @@ def process_pdf_file(file_path):
|
|
51 |
page_text = page.get_text("text")
|
52 |
if page_text.strip():
|
53 |
text += f"Page {page_num + 1}:\n{page_text}\n\n"
|
54 |
-
|
55 |
# Render page as an image with a zoom factor
|
56 |
zoom = 3
|
57 |
mat = fitz.Matrix(zoom, zoom)
|
58 |
pix = page.get_pixmap(matrix=mat, alpha=False)
|
59 |
img_data = pix.tobytes("png")
|
60 |
img = Image.open(io.BytesIO(img_data)).convert("RGB")
|
61 |
-
|
62 |
# Resize if image is too large
|
63 |
max_size = 1600
|
64 |
if max(img.size) > max_size:
|
@@ -83,7 +83,7 @@ def process_uploaded_file(file):
|
|
83 |
doc_state.clear()
|
84 |
if file is None:
|
85 |
return "No file uploaded. Please upload a file."
|
86 |
-
|
87 |
# Get the file path from the Gradio upload (may be a dict or file-like object)
|
88 |
if isinstance(file, dict):
|
89 |
file_path = file["name"]
|
@@ -91,7 +91,7 @@ def process_uploaded_file(file):
|
|
91 |
file_path = file.name
|
92 |
file_ext = file_path.lower().split('.')[-1]
|
93 |
image_extensions = {'png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp'}
|
94 |
-
|
95 |
if file_ext == 'pdf':
|
96 |
doc_state.doc_type = 'pdf'
|
97 |
try:
|
@@ -121,7 +121,7 @@ def process_uploaded_file(file):
|
|
121 |
# -------------------------------
|
122 |
# Bot Streaming Function Using the Multimodal API
|
123 |
# -------------------------------
|
124 |
-
def bot_streaming(prompt_option, user_message, max_new_tokens=8192):
|
125 |
"""
|
126 |
Build a multimodal message payload and call the inference API.
|
127 |
The payload includes:
|
@@ -576,15 +576,15 @@ This comprehensive system prompt provides a strong foundation for building a pow
|
|
576 |
"""
|
577 |
)
|
578 |
}
|
579 |
-
|
580 |
# Select the appropriate prompt
|
581 |
selected_prompt = prompts.get(prompt_option, "Invalid prompt selected.")
|
582 |
full_prompt = selected_prompt
|
583 |
-
|
584 |
# Append the user-provided message, if any
|
585 |
if user_message and user_message.strip():
|
586 |
full_prompt += "\nUser Message:\n" + user_message
|
587 |
-
|
588 |
# Append document context if available
|
589 |
if doc_state.current_doc_images and doc_state.current_doc_text:
|
590 |
full_prompt += "\nDocument context:\n" + doc_state.current_doc_text
|
@@ -602,7 +602,7 @@ This comprehensive system prompt provides a strong foundation for building a pow
|
|
602 |
]
|
603 |
}
|
604 |
]
|
605 |
-
|
606 |
# If an image is available, encode it as a data URI and append it as an image_url message.
|
607 |
if doc_state.current_doc_images:
|
608 |
buffered = io.BytesIO()
|
@@ -614,22 +614,23 @@ This comprehensive system prompt provides a strong foundation for building a pow
|
|
614 |
"type": "image_url",
|
615 |
"image_url": {"url": data_uri}
|
616 |
})
|
617 |
-
|
618 |
# Call the inference API with streaming enabled.
|
619 |
stream = client.chat.completions.create(
|
620 |
-
model=
|
621 |
messages=messages,
|
622 |
max_tokens=max_new_tokens,
|
623 |
stream=True
|
624 |
)
|
625 |
-
|
626 |
buffer = ""
|
627 |
for chunk in stream:
|
628 |
# The response structure is similar to the reference: each chunk contains a delta.
|
629 |
delta = chunk.choices[0].delta.content
|
630 |
-
|
631 |
-
|
632 |
-
|
|
|
633 |
|
634 |
except Exception as e:
|
635 |
logger.error(f"Error in bot_streaming: {str(e)}")
|
@@ -644,8 +645,8 @@ def clear_context():
|
|
644 |
# Create the Gradio Interface
|
645 |
# -------------------------------
|
646 |
with gr.Blocks() as demo:
|
647 |
-
gr.Markdown("# Document Analyzer with
|
648 |
-
gr.Markdown("Upload a PDF or image (PNG, JPG, JPEG, GIF, BMP, WEBP)
|
649 |
|
650 |
with gr.Row():
|
651 |
file_upload = gr.File(
|
@@ -655,12 +656,25 @@ with gr.Blocks() as demo:
|
|
655 |
upload_status = gr.Textbox(label="Upload Status", interactive=True)
|
656 |
|
657 |
with gr.Row():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
658 |
prompt_dropdown = gr.Dropdown(
|
659 |
label="Select Prompt",
|
660 |
choices=["Default","Structured Software Tester","UserStoryCraft","APIDoc","DBModel","RiskAssess","CodeComment","RequirementCraft","DesignDoc","DiagramGen","TechWrite","UIUXReview","AccessibilityCheck","RiskAssess"],
|
661 |
value="Default"
|
662 |
)
|
663 |
-
|
664 |
# Additional textbox for user messages
|
665 |
with gr.Row():
|
666 |
user_message_input = gr.Textbox(
|
@@ -668,16 +682,16 @@ with gr.Blocks() as demo:
|
|
668 |
placeholder="Enter any additional instructions or context here (optional)",
|
669 |
lines=4
|
670 |
)
|
671 |
-
|
672 |
with gr.Row():
|
673 |
generate_btn = gr.Button("Generate")
|
674 |
clear_btn = gr.Button("Clear Document Context")
|
675 |
-
|
676 |
output_text = gr.Textbox(label="Output", interactive=False, lines=15)
|
677 |
-
|
678 |
file_upload.change(fn=process_uploaded_file, inputs=[file_upload], outputs=[upload_status])
|
679 |
-
# Pass
|
680 |
-
generate_btn.click(fn=bot_streaming, inputs=[prompt_dropdown, user_message_input], outputs=[output_text])
|
681 |
clear_btn.click(fn=clear_context, outputs=[upload_status])
|
682 |
|
683 |
-
demo.launch(debug=True)
|
|
|
51 |
page_text = page.get_text("text")
|
52 |
if page_text.strip():
|
53 |
text += f"Page {page_num + 1}:\n{page_text}\n\n"
|
54 |
+
|
55 |
# Render page as an image with a zoom factor
|
56 |
zoom = 3
|
57 |
mat = fitz.Matrix(zoom, zoom)
|
58 |
pix = page.get_pixmap(matrix=mat, alpha=False)
|
59 |
img_data = pix.tobytes("png")
|
60 |
img = Image.open(io.BytesIO(img_data)).convert("RGB")
|
61 |
+
|
62 |
# Resize if image is too large
|
63 |
max_size = 1600
|
64 |
if max(img.size) > max_size:
|
|
|
83 |
doc_state.clear()
|
84 |
if file is None:
|
85 |
return "No file uploaded. Please upload a file."
|
86 |
+
|
87 |
# Get the file path from the Gradio upload (may be a dict or file-like object)
|
88 |
if isinstance(file, dict):
|
89 |
file_path = file["name"]
|
|
|
91 |
file_path = file.name
|
92 |
file_ext = file_path.lower().split('.')[-1]
|
93 |
image_extensions = {'png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp'}
|
94 |
+
|
95 |
if file_ext == 'pdf':
|
96 |
doc_state.doc_type = 'pdf'
|
97 |
try:
|
|
|
121 |
# -------------------------------
|
122 |
# Bot Streaming Function Using the Multimodal API
|
123 |
# -------------------------------
|
124 |
+
def bot_streaming(model_option, prompt_option, user_message, max_new_tokens=8192):
|
125 |
"""
|
126 |
Build a multimodal message payload and call the inference API.
|
127 |
The payload includes:
|
|
|
576 |
"""
|
577 |
)
|
578 |
}
|
579 |
+
|
580 |
# Select the appropriate prompt
|
581 |
selected_prompt = prompts.get(prompt_option, "Invalid prompt selected.")
|
582 |
full_prompt = selected_prompt
|
583 |
+
|
584 |
# Append the user-provided message, if any
|
585 |
if user_message and user_message.strip():
|
586 |
full_prompt += "\nUser Message:\n" + user_message
|
587 |
+
|
588 |
# Append document context if available
|
589 |
if doc_state.current_doc_images and doc_state.current_doc_text:
|
590 |
full_prompt += "\nDocument context:\n" + doc_state.current_doc_text
|
|
|
602 |
]
|
603 |
}
|
604 |
]
|
605 |
+
|
606 |
# If an image is available, encode it as a data URI and append it as an image_url message.
|
607 |
if doc_state.current_doc_images:
|
608 |
buffered = io.BytesIO()
|
|
|
614 |
"type": "image_url",
|
615 |
"image_url": {"url": data_uri}
|
616 |
})
|
617 |
+
|
618 |
# Call the inference API with streaming enabled.
|
619 |
stream = client.chat.completions.create(
|
620 |
+
model=model_option, # Use the selected model here
|
621 |
messages=messages,
|
622 |
max_tokens=max_new_tokens,
|
623 |
stream=True
|
624 |
)
|
625 |
+
|
626 |
buffer = ""
|
627 |
for chunk in stream:
|
628 |
# The response structure is similar to the reference: each chunk contains a delta.
|
629 |
delta = chunk.choices[0].delta.content
|
630 |
+
if delta is not None: # Check if delta is not None
|
631 |
+
buffer += delta
|
632 |
+
time.sleep(0.01)
|
633 |
+
yield buffer
|
634 |
|
635 |
except Exception as e:
|
636 |
logger.error(f"Error in bot_streaming: {str(e)}")
|
|
|
645 |
# Create the Gradio Interface
|
646 |
# -------------------------------
|
647 |
with gr.Blocks() as demo:
|
648 |
+
gr.Markdown("# Document Analyzer with Model and Prompt Selection")
|
649 |
+
gr.Markdown("Upload a PDF or image (PNG, JPG, JPEG, GIF, BMP, WEBP), select a model and a prompt to analyze its contents.")
|
650 |
|
651 |
with gr.Row():
|
652 |
file_upload = gr.File(
|
|
|
656 |
upload_status = gr.Textbox(label="Upload Status", interactive=True)
|
657 |
|
658 |
with gr.Row():
|
659 |
+
model_dropdown = gr.Dropdown(
|
660 |
+
label="Select Model",
|
661 |
+
choices=[
|
662 |
+
"google/gemini-2.0-pro-exp-02-05:free",
|
663 |
+
"meta-llama/llama-3.2-11b-vision-instruct:free",
|
664 |
+
"qwen/qwen-vl-plus:free",
|
665 |
+
"google/gemini-2.0-flash-lite-preview-02-05:free",
|
666 |
+
"google/gemini-2.0-flash-thinking-exp:free",
|
667 |
+
"qwen/qwen2.5-vl-72b-instruct:free"
|
668 |
+
# "openai/gpt-4-vision-preview" # Uncomment if you have access and want to include
|
669 |
+
],
|
670 |
+
value="google/gemini-2.0-pro-exp-02-05:free" # Default model
|
671 |
+
)
|
672 |
prompt_dropdown = gr.Dropdown(
|
673 |
label="Select Prompt",
|
674 |
choices=["Default","Structured Software Tester","UserStoryCraft","APIDoc","DBModel","RiskAssess","CodeComment","RequirementCraft","DesignDoc","DiagramGen","TechWrite","UIUXReview","AccessibilityCheck","RiskAssess"],
|
675 |
value="Default"
|
676 |
)
|
677 |
+
|
678 |
# Additional textbox for user messages
|
679 |
with gr.Row():
|
680 |
user_message_input = gr.Textbox(
|
|
|
682 |
placeholder="Enter any additional instructions or context here (optional)",
|
683 |
lines=4
|
684 |
)
|
685 |
+
|
686 |
with gr.Row():
|
687 |
generate_btn = gr.Button("Generate")
|
688 |
clear_btn = gr.Button("Clear Document Context")
|
689 |
+
|
690 |
output_text = gr.Textbox(label="Output", interactive=False, lines=15)
|
691 |
+
|
692 |
file_upload.change(fn=process_uploaded_file, inputs=[file_upload], outputs=[upload_status])
|
693 |
+
# Pass model, prompt and user message to bot_streaming
|
694 |
+
generate_btn.click(fn=bot_streaming, inputs=[model_dropdown, prompt_dropdown, user_message_input], outputs=[output_text])
|
695 |
clear_btn.click(fn=clear_context, outputs=[upload_status])
|
696 |
|
697 |
+
demo.launch(debug=True)
|