from PIL import Image import pytesseract import os import pymupdf #import spaces import torch import gradio as gr from prepare import prepare from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig from langchain_community.llms import HuggingFacePipeline from langchain.prompts import PromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate from langchain_core.output_parsers import StrOutputParser from langchain_community.document_loaders import YoutubeLoader, DataFrameLoader from langchain_community.vectorstores.utils import filter_complex_metadata from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from langchain.schema.runnable import RunnablePassthrough from langchain_core.messages import AIMessage, HumanMessage from langchain_community.llms import HuggingFaceEndpoint from dotenv import load_dotenv from huggingface_hub import InferenceApi #zero = torch.Tensor([0]).cuda() #@spaces.GPU def read_pdf(file_path): output = '' doc = pymupdf.open(file_path) for page in range(len(doc)): text = doc[page].get_text().encode("utf8") if text: output += text.decode('utf-8') else: image_list = doc[page].get_images() for image_index, img in enumerate(image_list, start=1): # enumerate the image list xref = img[0] # get the XREF of the image pix = pymupdf.Pixmap(doc, xref) # create a Pixmap if pix.n - pix.alpha > 3: # CMYK: convert to RGB first pix = pymupdf.Pixmap(pymupdf.csRGB, pix) path = "page_{}-image_{}.png".format(page, image_index) pix.save(path) # save the image as png img = Image.open(path) pix = None output += pytesseract.image_to_string(img, lang='vie') + '\n' return output # Function to query Hugging Face endpoint #@spaces.GPU def query_huggingface(text): api = InferenceApi(repo_id="google/gemma-2-9b-it", token="your_token_here") response = api(inputs="Lê Duẩn là ai?") return response # Gradio Interface for PDF Processing def process_file(file, query): pdf_output = read_pdf(file.name) huggingface_output = query_huggingface(query) return pdf_output, huggingface_output # Create Gradio App interface = gr.Interface( fn=process_file, inputs=[ gr.File(label="Upload a PDF file"), #gr.Textbox(label="Enter your query for Hugging Face"), ], outputs=[ gr.Textbox(label="PDF Content"), #gr.Textbox(label="Hugging Face Output"), ], title="PDF Processor with Hugging Face Query" ) # Launch the Gradio App if __name__ == "__main__": prepare() interface.launch()