Spaces:

eagle0504
/

IDP-Demo

Sleeping

App Files Files Community

IDP-Demo / utils /helpers.py

eagle0504

app updated

6738563 10 months ago

raw

history blame contribute delete

9.3 kB

	import base64
	import io
	import json
	import os
	from typing import Any, Dict, List

	import chromadb
	import google.generativeai as palm
	import matplotlib.patches as patches
	import matplotlib.pyplot as plt
	import pandas as pd
	import requests
	import streamlit as st
	from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
	from langchain.text_splitter import (
	RecursiveCharacterTextSplitter,
	SentenceTransformersTokenTextSplitter,
	)
	from PIL import Image, ImageDraw, ImageFont
	from pypdf import PdfReader

	# API Key (You should set this in your environment variables)
	# api_key = st.secrets["PALM_API_KEY"]
	api_key = os.environ["PALM_API_KEY"]
	palm.configure(api_key=api_key)


	# Function to convert the image to bytes for download
	def convert_image_to_bytes(image):
	buffered = io.BytesIO()
	image.save(buffered, format="JPEG")
	return buffered.getvalue()


	# Function to resize the image
	def resize_image(image):
	return image.resize((512, int(image.height * 512 / image.width)))


	# Function to convert the image to base64
	def convert_image_to_base64(image):
	buffered = io.BytesIO()
	image.save(buffered, format="JPEG")
	return base64.b64encode(buffered.getvalue()).decode()


	# Function to make an API call to Palm
	def call_palm(prompt: str) -> str:
	completion = palm.generate_text(
	model="models/text-bison-001",
	prompt=prompt,
	temperature=0,
	max_output_tokens=800,
	)

	return completion.result


	# Function to make an API call to Google's Gemini API
	def call_gemini_api(image_base64, api_key=api_key, prompt="What is this picture?"):
	headers = {
	"Content-Type": "application/json",
	}
	data = {
	"contents": [
	{
	"parts": [
	{"text": prompt},
	{"inline_data": {"mime_type": "image/jpeg", "data": image_base64}},
	]
	}
	]
	}
	response = requests.post(
	f"https://generativelanguage.googleapis.com/v1beta/models/gemini-pro-vision:generateContent?key={api_key}",
	headers=headers,
	json=data,
	)
	return response.json()


	def safely_get_text(response):
	try:
	response
	except Exception as e:
	print(f"An error occurred: {e}")

	# Return None or a default value if the path does not exist
	return None


	def post_request_and_parse_response(
	url: str, payload: Dict[str, Any]
	) -> Dict[str, Any]:
	"""
	Sends a POST request to the specified URL with the given payload,
	then parses the byte response to a dictionary.

	Args:
	url (str): The URL to which the POST request is sent.
	payload (Dict[str, Any]): The payload to send in the POST request.

	Returns:
	Dict[str, Any]: The parsed dictionary from the response.
	"""
	# Set headers for the POST request
	headers = {"Content-Type": "application/json"}

	# Send the POST request and get the response
	response = requests.post(url, json=payload, headers=headers)

	# Extract the byte data from the response
	byte_data = response.content

	# Decode the byte data to a string
	decoded_string = byte_data.decode("utf-8")

	# Convert the JSON string to a dictionary
	dict_data = json.loads(decoded_string)

	return dict_data


	def extract_line_items(input_data: Dict[str, Any]) -> List[Dict[str, Any]]:
	"""
	Extracts items with "BlockType": "LINE" from the provided JSON data.

	Args:
	input_data (Dict[str, Any]): The input JSON data as a dictionary.

	Returns:
	List[Dict[str, Any]]: A list of dictionaries with the extracted data.
	"""
	# Initialize an empty list to hold the extracted line items
	line_items: List[Dict[str, Any]] = []

	# Get the list of items from the 'body' key in the input data
	body_items = json.loads(input_data.get("body", "[]"))

	# Iterate through each item in the body
	for item in body_items:
	# Check if the BlockType of the item is 'LINE'
	if item.get("BlockType") == "LINE":
	# Add the item to the line_items list
	line_items.append(item)

	return line_items


	def rag(query: str, retrieved_documents: list, api_key: str = api_key) -> str:
	"""
	Function to process a query and a list of retrieved documents using the Gemini API.

	Args:
	query (str): The user's query or question.
	retrieved_documents (list): A list of documents retrieved as relevant information to the query.
	api_key (str): API key for accessing the Gemini API. Default is a predefined 'api_key'.

	Returns:
	str: The cleaned output from the Gemini API response.
	"""
	# Combine the retrieved documents into a single string, separated by two newlines.
	information = "\n\n".join(retrieved_documents)

	# Format the query and combined information into a single message.
	messages = f"Question: {query}. \n Information: {information}"

	# Call the Gemini API with the formatted message and the API key.
	gemini_output = call_palm(prompt=messages)

	# Placeholder for processing the Gemini output. Currently, it simply assigns the raw output to 'cleaned_output'.
	cleaned_output = gemini_output # ["candidates"][0]["content"]["parts"][0]["text"]

	return cleaned_output


	def displayPDF(file: str) -> None:
	"""
	Displays a PDF file in a Streamlit application.

	Parameters:
	- file (str): The path to the PDF file to be displayed.
	"""

	# Opening the PDF file in binary read mode
	with open(file, "rb") as f:
	# Encoding the PDF file content to base64
	base64_pdf: str = base64.b64encode(f.read()).decode("utf-8")

	# Creating an HTML embed string for displaying the PDF
	pdf_display: str = f'<embed src="data:application/pdf;base64,{base64_pdf}" width="700" height="1000" type="application/pdf">'

	# Using Streamlit to display the HTML embed string as unsafe HTML
	st.markdown(pdf_display, unsafe_allow_html=True)


	def draw_boxes(image: Any, predictions: List[Dict[str, Any]]) -> Any:
	"""
	Draws bounding boxes and labels onto an image based on provided predictions.

	Parameters:
	- image (Any): The image to annotate, which should support the PIL drawing interface.
	- predictions (List[Dict[str, Any]]): A list of predictions where each prediction is a dictionary
	containing 'label', 'score', and 'box' keys. The 'box' is another dictionary with 'xmin',
	'ymin', 'xmax', and 'ymax' as keys representing coordinates for the bounding box.

	Returns:
	- Any: The annotated image with bounding boxes and labels drawn on it.

	Note:
	- This function assumes that the incoming image supports the PIL ImageDraw interface.
	- The function directly modifies the input image and returns it.
	"""
	# Create a drawing context from the image
	draw = ImageDraw.Draw(image)
	# Load a default font for text drawing
	font = ImageFont.load_default()

	# Loop through all predictions and draw boxes with labels
	for pred in predictions:
	# Extracting label and score from the prediction
	label = pred["label"]
	score = pred["score"]
	# Extracting the bounding box coordinates
	box = pred["box"]
	xmin, ymin, xmax, ymax = box.values()
	# Draw a rectangle over the image using the box's coordinates
	draw.rectangle([xmin, ymin, xmax, ymax], outline="green", width=1)
	# Annotate the image with label and score at the top-left corner of the bounding box
	draw.text((xmin, ymin), f"{label} ({score:.2f})", fill="red", font=font)

	# Return the annotated image
	return image


	def draw_bounding_boxes_for_textract(
	image: Image.Image, json_data: Dict[str, Any]
	) -> Image.Image:
	"""
	Draws bounding boxes on an image based on the provided JSON data from Textract.

	Args:
	image_path: The path to the image on which to draw bounding boxes.
	json_data: The JSON string containing the bounding box data from Textract.

	Returns:
	A PIL Image object with bounding boxes drawn.
	"""
	# Load the image from the provided path
	draw = ImageDraw.Draw(image)

	# Parse the JSON data
	try:
	data = json_data
	blocks = json.loads(data["body"]) if "body" in data else None
	except json.JSONDecodeError:
	st.error("Invalid JSON data.")
	return image

	if blocks is None:
	st.error("No bounding box data found.")
	return image

	# Iterate through the elements to find bounding boxes and draw them
	for item in blocks:
	if "BlockType" in item and item["BlockType"] in ["LINE", "WORD"]:
	bbox = item["Geometry"]["BoundingBox"]
	# Extract coordinates and dimensions
	left, top, width, height = (
	bbox["Left"],
	bbox["Top"],
	bbox["Width"],
	bbox["Height"],
	)
	# Calculate bounding box coordinates in image space
	left_top = (left * image.width, top * image.height)
	right_bottom = ((left + width) * image.width, (top + height) * image.height)
	# Draw rectangle
	draw.rectangle([left_top, right_bottom], outline="red", width=2)

	return image