saivv / app.py
MojoHz's picture
Update app.py
0e6ecd3 verified
# -*- coding: utf-8 -*-
"""saivv_protoype"""
# Import necessary libraries
import cv2 # For image processing with OpenCV
import pytesseract # For Optical Character Recognition (OCR) on receipts
import gradio as gr # For creating the Gradio interface
import speech_recognition as sr # For voice recognition
# Model setup (using transformers)
import torch
import transformers # Added import for transformers
from transformers import AutoTokenizer
from langchain_community.llms import HuggingFacePipeline # Updated import for HuggingFacePipeline
# Initialize device and model config
device = 'cpu' # Use CPU since CUDA is unavailable
print(device)
# Load GPT-2 model instead of zephyr-7b-beta
model_id = 'gpt2' # Use GPT-2, a smaller and CPU-friendly model
model_config = transformers.AutoConfig.from_pretrained(
model_id,
trust_remote_code=True,
max_new_tokens=1024
)
model = transformers.AutoModelForCausalLM.from_pretrained(
model_id,
trust_remote_code=True,
config=model_config,
device_map='auto', # Set to CPU
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
# Create the query pipeline for GPT-2 model
query_pipeline = transformers.pipeline(
'text-generation',
model=model,
tokenizer=tokenizer,
torch_dtype=torch.float32, # Use float32 for CPU compatibility
max_length=6000,
max_new_tokens=500,
device_map="auto"
)
llm = HuggingFacePipeline(pipeline=query_pipeline)
# User profile setup
user_profile = """
User Profile:
Age: 40, Gender: Non-Binary, Marital Status: Divorced, Income Level: Medium ($2733),
Education: PhD, Occupation: Manager, Residential Status: Mortgaged, Dependents: 1,
Debt: $27664, Debt-to-Income Ratio: 10.12
Spending:
Groceries: $496.0, Supplies: $454.42, Food: $341.69, Electronics: $351.92,
Home Shopping: $235.68, Others: $253.45
"""
question = "Based on this data, can I buy a Lamborghini?"
prompt = f"{user_profile}\n\nQuestion: {question}"
# Get response from LLM
response = llm(prompt=prompt)
# Display result
from IPython.display import display, Markdown
def colorize_text(text):
for word, color in zip(["Reasoning", "Question", "Answer", "Total time"], ["blue", "red", "green", "magenta"]):
text = text.replace(f"{word}:", f"\n\n**<font color='{color}'>{word}:</font>**")
return text
full_response = f"**Question:** {question}\n\n**Answer:** {response}"
display(Markdown(colorize_text(full_response)))
# Placeholder function for receipt scanning with OCR
def scan_receipt(image):
try:
img_orig = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
options = "--psm 6"
text = pytesseract.image_to_string(img_orig, config=options)
return text.strip()
except Exception as e:
return f"An error occurred: {str(e)}"
# Placeholder function for voice recording
def record_expense(audio_path):
recognizer = sr.Recognizer()
with sr.AudioFile(audio_path) as source:
audio = recognizer.record(source)
try:
# Transcribe the audio file
transcription = recognizer.recognize_google(audio)
return transcription
except sr.UnknownValueError:
return "Audio not clear, please try again."
except sr.RequestError:
return "Could not request results; check internet connection."
# Recommendation chatbot function
def recommendation_chatbot(user_input):
prompt = f"{user_profile}\n\nQuestion: {user_input}"
response = llm(prompt=prompt)
return str(response) # Convert to string if needed
# Wrapper function to handle receipt scanning and voice recording
def process_inputs(image, audio):
receipt_data = scan_receipt(image)
expense_data = record_expense(audio)
return receipt_data, expense_data
# Gradio Interface setup using Blocks
with gr.Blocks() as iface:
# Centered title and description
gr.Markdown("<h1 style='text-align: center; font-size: 2.5em; color: #2B2D42;'>SAIVV</h1>")
gr.Markdown("<p style='text-align: center; font-size: 1.2em; color: #8D99AE;'>An AI-powered fintech solution for tracking expenses and managing finances.</p>")
# Profile and Spending section with improved formatting
with gr.Row():
gr.Markdown("""
<div style="padding: 20px; border: 1px solid #2B2D42; border-radius: 10px; background-color: #EDF2F4;">
<h2 style="color: #2B2D42; margin-bottom: 10px;">User Profile: Mohamed</h2>
<ul style="font-size: 1.1em; color: #2B2D42; list-style-type: none; padding: 0;">
<li><strong>Age:</strong> 40</li>
<li><strong>Gender:</strong> Male</li>
<li><strong>Marital Status:</strong> Married</li>
<li><strong>Income Level:</strong> Medium ($2733)</li>
<li><strong>Education:</strong> PhD</li>
<li><strong>Occupation:</strong> Manager</li>
<li><strong>Residential Status:</strong> Mortgaged</li>
<li><strong>Dependents:</strong> 1</li>
<li><strong>Debt:</strong> $27,664</li>
<li><strong>Debt-to-Income Ratio:</strong> 10.12%</li>
</ul>
</div>
""", elem_id="user-profile")
gr.Markdown("""
<div style="padding: 20px; border: 1px solid #2B2D42; border-radius: 10px; background-color: #EDF2F4;">
<h2 style="color: #2B2D42; margin-bottom: 10px;">Spending</h2>
<ul style="font-size: 1.1em; color: #2B2D42; list-style-type: none; padding: 0;">
<li><strong>Groceries:</strong> $496.0</li>
<li><strong>Supplies:</strong> $454.42</li>
<li><strong>Food:</strong> $341.69</li>
<li><strong>Electronics:</strong> $351.92</li>
<li><strong>Home Shopping:</strong> $235.68</li>
<li><strong>Others:</strong> $253.45</li>
</ul>
</div>
""", elem_id="spending-info")
# Input and output components for receipt scanning and expense recording
with gr.Row():
receipt_input = gr.Image(type="numpy", label="Capture Receipt")
audio_input = gr.Audio(type="filepath", label="Record Expense by Voice")
# Outputs for receipt and expense recording
receipt_output = gr.Textbox(label="Receipt Data")
expense_output = gr.Textbox(label="Recorded Expense Data")
# Button to process inputs
submit_btn = gr.Button("Submit")
submit_btn.click(process_inputs, inputs=[receipt_input, audio_input], outputs=[receipt_output, expense_output])
# Launch the interface
iface.launch(debug=True)