Spaces:

Effyis
/

AGDC-APIS

Sleeping

App Files Files Community

Nechba commited on Jun 19

Commit

0b644a6

•

1 Parent(s): 808e59f

first commit

Browse files

Files changed (4) hide show

docker-compose.yml +19 -0
main.py +420 -0
requirements.txt +12 -0
srs/utils.py +429 -0

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,19 @@

+version: '3.8'
+services:
+  web:
+    build: .
+    ports:
+      - "80:80"
+    depends_on:
+      - redis
+    environment:
+      - REDIS_URL=redis://redis:6379
+  redis:
+    image: "redis/redis-stack-server:latest"
+    ports:
+      - "6379:6379"
+    volumes:
+      - redis_data:/data
+volumes:
+  redis_data:

main.py ADDED Viewed

	@@ -0,0 +1,420 @@

+import numpy as np
+import redis
+import requests
+from fastapi import FastAPI, HTTPException, status
+from typing import List
+from datetime import datetime
+from fastapi.concurrency import run_in_threadpool
+from fastapi import Query
+from srs.utils import functions_doc
+from fastapi import FastAPI, WebSocket, WebSocketDisconnect
+import asyncio
+from uuid import uuid4
+from pydantic import BaseModel, validator
+from fastapi import FastAPI, HTTPException, status, Depends
+from pydantic import BaseModel,conlist
+from typing import List, Optional,Dict, Any
+import redis
+import json
+import logging
+from fastapi.responses import JSONResponse
+import base64
+import PyPDF2
+import io
+r = redis.Redis(host='redis', port=6379, db=0, password=None,decode_responses=True)
+# Instantiate your document functions class
+app=FastAPI()
+class ServiceRemovalRequest(BaseModel):
+    token: str
+    servicename: str
+class Document(BaseModel):
+    token: str
+    service_name: str
+    document_name: str
+class ServicesResponse(BaseModel):
+    message: str
+    added_services: List[str]
+class RemoveDocumentsRequest(BaseModel):
+    token: str
+    service_name: str
+    document_names: List[str]
+# Define request body model
+class Service(BaseModel):
+    servicename: str
+class TokenServicesRequest(BaseModel):
+    token: str
+    services: List[Service]
+class services(BaseModel):
+   token: str
+class AddDocumentRequest(BaseModel):
+    token: str
+    servicename: str
+    documentname: str
+class StoreDocumentServicesRequest(BaseModel):
+    token: str
+    service_name: str
+    document_name: str
+    file: bytes
+class DocumentChunks(BaseModel):
+    token: str
+    service_name: str
+    document_name: str
+    method: str = "chunk_per_page"
+    split_token: Optional[str] = ""
+    start_page: int = 1
+    end_page: int = 1
+    @validator('split_token', always=True)
+    def check_real_text(cls, v, values):
+        method = values.get('method')
+        if method == 'personalize_chunking' and not v:
+            raise ValueError('split_token is required when method is personalize_chunking')
+        return v
+class DocumentRespons(BaseModel):
+    token: str
+    service_name: str
+    document_name: str
+    method: str = "chunk_per_page"
+    model : str = "gpt-3.5-turbo"
+    schema: dict
+    comment: Optional[dict] = {}
+    split_token: Optional[str] = ""
+    start_page: int = 1
+    end_page: int = 1
+    @validator('split_token', always=True)
+    def check_real_text(cls, v, values):
+        method = values.get('method')
+        if method == 'personalize_chunking' and not v:
+            raise ValueError('split_token is required when method is personalize_chunking')
+        return v
+@app.post("/add_services", status_code=status.HTTP_201_CREATED)
+async def add_services(request: TokenServicesRequest):
+    """
+    Adds a list of services to a given token.
+    This endpoint accepts a request with a token and a list of services, attempting to add each service to the specified token.
+    The service information must include all necessary details like name and description.
+    Parameters:
+    - request (TokenServicesRequest): A model containing the authorization token and a list of services to be added.
+    Returns:
+    - A list of dictionaries, each representing a successfully added service with its details.
+    Raises:
+    - HTTPException: 400 Bad Request if any value error occurs during processing, typically due to invalid input.
+    - HTTPException: 500 Internal Server Error if any unexpected errors occur during the process.
+    """
+    try:
+        # Convert services to dicts
+        services_dicts = [service.dict() for service in request.services]
+        result = functions_doc.add_services_to_token(request.token, services_dicts)
+        return result
+    except ValueError as ve:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(ve))
+    except Exception as e:
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
+@app.delete("/remove_service/")
+async def remove_service(request: ServiceRemovalRequest):
+    """
+    Removes a specified service associated with a token.
+    This endpoint allows the removal of a service by its name from a list associated with a given token.
+    Before attempting to remove the service, it verifies that the token exists. If the token does not exist,
+    a 404 error is returned. If the service name does not exist under the token or cannot be removed,
+    a 400 error is raised with a specific message.
+    Parameters:
+    - request (ServiceRemovalRequest): A model containing the authorization token and the name of the service to be removed.
+    Returns:
+    - A dictionary with a success status and a message indicating the outcome of the operation.
+    Raises:
+    - HTTPException: 404 Not Found if the token does not exist.
+    - HTTPException: 400 Bad Request if the service cannot be removed or does not exist under the token.
+    - HTTPException: 500 Internal Server Error for any other unexpected errors.
+    """
+    try:
+        # Check if the token exists in Redis
+        user_key = f"token:{request.token}:docservices"
+        if not r.exists(user_key):
+            raise HTTPException(status_code=404, detail="Token not found.")
+        # If checks pass, proceed to remove the service
+        manager_doc = functions_doc()
+        result = manager_doc.remove_service_by_name(token=request.token, servicename=request.servicename)
+        if result["success"]:
+            return {"success": True, "message": result["message"]}
+        else:
+            raise HTTPException(status_code=400, detail=result["message"])
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/add_and_store_document/", summary="Store a Document in Redis",
+          description="Stores a document as a base64 encoded string in Redis. The document is tagged with additional metadata and associated with a unique key.")
+async def add_and_store_document(request:StoreDocumentServicesRequest):
+    """
+    Stores a file document in Redis as a base64 encoded string with its corresponding tags and document names.
+    Args:
+    token (str): The unique identifier for the user.
+    service_name (str): The service under which the document will be stored.
+    document_name (str): The name of the document to be stored.
+    file (UploadFile): The file document to be stored as a base64 encoded string.
+    Returns:
+    JSONResponse: A JSON response indicating the status of the operation ("success" or "error") and a message describing the result or the error encountered.
+    Raises:
+    HTTPException: An HTTP exception is raised with a status code of 400 or 500 depending on the type of error during the storing process.
+    """
+    try:
+        # Read file content as bytes
+        #encoded_file = await request.file.read()
+        # Store the document
+        # doc_manager = functions_doc()
+        response = functions_doc.add_and_store_document(token=request.token, service_name=request.service_name,document_name= request.document_name, encoded_file=request.file)
+        return JSONResponse(status_code=200, content={"status":"success", "message":response})
+    except redis.RedisError as e:
+        logging.error(f"Failed to store document: {e}")
+        return JSONResponse(status_code=500, content={"status": "error", "message": str(e)})
+    except Exception as e:
+        logging.error(f"An error occurred: {e}")
+        return JSONResponse(status_code=500, content={"status": "error", "message": "An unexpected error occurred"})
+@app.delete("/remove_documents/", summary="Remove Multiple Documents",
+            description="Removes multiple documents from both the Redis store and the specified service list under a given token.")
+async def remove_documents(request: RemoveDocumentsRequest):
+    """
+    Removes multiple documents from Redis storage and their references from a specified service list under a user's token.
+    Args:
+    request (RemoveDocumentsRequest): A Pydantic model that includes the token, the service name, and a list of document names to be removed.
+    Returns:
+    dict: A dictionary indicating the status of the operation ("success" or "error") and a message describing the result or error encountered.
+    Raises:
+    HTTPException: An HTTP exception is raised with status code 400 or 500, depending on the type of error during the document removal process.
+    """
+    try:
+        manager_doc = functions_doc()
+        response = manager_doc.remove_documents_from_service(token = request.token, service_name = request.service_name, document_names = request.document_names)
+        return response
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+@app.get("/services/", response_model=list)
+def retrieve_service_names(request: services) -> list:
+    """
+    Endpoint to retrieve service names for a given token.
+    :param token: The unique token of the user.
+    :return: A list of service names associated with the token or an empty list if none are found.
+    """
+    services_key = f"token:{request.token}:docservices"
+    try:
+        existing_services = r.lrange(services_key, 0, -1)
+        service_names = [json.loads(service)['servicename'] for service in existing_services]
+        return service_names if service_names else []
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to fetch or parse services: {str(e)}")
+@app.get("/documents/", response_model=dict)
+def retrieve_documents(request:ServiceRemovalRequest) -> dict:
+    """
+    Endpoint to retrieve document names from a specific service for a given token.
+    :param token: The unique token of the user.
+    :param servicename: The name of the service from which to retrieve documents.
+    :return: A dictionary containing a list of documents or a message if the service is not found.
+    """
+    try:
+        manager_doc = functions_doc()
+        response = manager_doc.get_documents_from_service(token = request.token, servicename = request.servicename)
+        return response
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+@app.get("/get_document/", response_model= Optional[bytes])
+def get_document(request: Document) ->  dict:
+    """
+    Retrieves a document stored as base64-encoded bytes from a specified service for a given token.
+    This endpoint is responsible for fetching a document by name from a specific service associated with a token.
+    The document is expected to be stored in base64-encoded byte format. The response will include the document
+    if available or return an appropriate error message if not found or in case of an error.
+    Parameters:
+    - request (Document): A model containing the authorization token, service name, and document name.
+    Returns:
+    - A JSON response containing the document in base64-encoded format or an error message.
+    Raises:
+    - HTTPException: 400 Bad Request if there's an error during the retrieval process.
+    """
+    try:
+        manager_doc = functions_doc()
+        response = manager_doc.get_document(token=request.token, service_name=request.service_name, document_name=request.document_name)
+        return JSONResponse(status_code=200, content=response)
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+@app.get("/get_num_pages/", response_model= dict)
+def get_num_pages(request: Document) ->  dict:
+    """
+    Retrieves the number of pages in a PDF document from a specified service for a given token.
+    This endpoint fetches a document stored as a base64-encoded string, decodes it, and counts the number of pages using PyPDF2.
+    Parameters:
+    - request (Document): A model containing the authorization token, service name, and document name.
+    Returns:
+    - A JSON response with the status, message including the number of pages, and the number of pages if successful.
+    Raises:
+    - HTTPException: 400 Bad Request if there's an error during the retrieval or processing of the document.
+    """
+    try:
+        manager_doc = functions_doc()
+        response = manager_doc.get_document(token=request.token, service_name=request.service_name, document_name=request.document_name)
+        if response["status"]=="success":
+            decoded_file = base64.b64decode(response["document"])
+            # Use BytesIO to create a file-like object in memory from the decoded data
+            pdf_file_like = io.BytesIO(decoded_file)
+            # Use PyPDF2 to read the file-like object and count the pages
+            pdf_reader = PyPDF2.PdfReader(pdf_file_like)
+            number_of_pages = len(pdf_reader.pages)
+            return JSONResponse(status_code=200, content={"status": "success", "message": f"Document has {number_of_pages} pages.", "num_pages": number_of_pages})
+        return JSONResponse(status_code=200, content=response)
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+@app.get("/get_chunks/", response_model=dict)
+def get_chunks(request: DocumentChunks) -> dict:
+    """
+    Retrieves text chunks from a specified range of pages in a document according to the chunking method.
+    This endpoint decodes a stored document and processes it to extract text chunks from specified pages.
+    Users must specify a valid start and end page, and a chunking method. The method can be 'chunk_per_page'
+    for straightforward chunking by page, or 'personalize_chunking' which may use additional text parameters.
+    Parameters:
+    - request (DocumentChunks): A model containing the necessary details to fetch and chunk the document.
+    Returns:
+    - A dictionary response with the status, a message including the count of chunks, and the chunks themselves.
+    Raises:
+    - HTTPException: 400 Bad Request if there are parameter errors or processing fails.
+    """
+    try:
+        manager_doc = functions_doc()
+        response = manager_doc.get_document(token=request.token, service_name=request.service_name, document_name=request.document_name)
+        if response["status"] == "success":
+            decoded_file = base64.b64decode(response["document"])
+            pdf_file_like = io.BytesIO(decoded_file)
+            pdf_reader = PyPDF2.PdfReader(pdf_file_like)
+            number_of_pages = len(pdf_reader.pages)
+            if request.start_page < 1 or request.end_page > number_of_pages or request.start_page > request.end_page:
+                raise HTTPException(status_code=400, detail="Invalid start_page or end_page.")
+            if request.method == "chunk_per_page":
+                chunks = manager_doc.extract_text_from_pdf(pdf_file_like, request.start_page, request.end_page)
+                return {"status": "success", "message": f"Document has {len(chunks)} chunk(s).", "chunks": chunks}
+            elif request.method == "personalize_chunking":
+                # Assuming you process personalized chunking here:
+                personalized_chunks = manager_doc.personalize_chunking(request.split_token, pdf_file_like, request.start_page, request.end_page)
+                return {"status": "success", "message": f"Document has {len(personalized_chunks)} personalized chunk(s).", "chunks": personalized_chunks}
+            else:
+                raise HTTPException(status_code=400, detail="Invalid method provided.")
+        return response
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+@app.get("/structure_response/", response_model=dict)
+def structure_response(request: DocumentRespons)-> dict:
+    """
+    Retrieves and processes chunks of a document into structured JSON based on specific criteria.
+    This endpoint decodes a stored document and processes it to extract and transform text chunks
+    from specified pages into structured JSON format. Users must specify a valid start and end page,
+    and a chunking method. The method can be 'chunk_per_page' for straightforward chunking by page,
+    or 'personalize_chunking' which may use additional text parameters. The model parameter can take
+    values like "gpt-3.5-turbo" or "gemini" for processing the chunks. The processing method and output
+    schema are specified by the user.
+    Parameters:
+    - request (DocumentRespons): A model containing the details needed to fetch, chunk, and structure the document.
+    Returns:
+    - A dictionary response with the status and the structured JSON if successful.
+    Raises:
+    - HTTPException: 400 Bad Request for parameter errors or processing issues.
+    - HTTPException: 500 Internal Server Error for any other unexpected errors.
+    """
+    try:
+        # Assuming functions_doc() returns an instance with necessary methods
+        manager_doc = functions_doc()
+        response = manager_doc.get_document(token=request.token, service_name=request.service_name, document_name=request.document_name)
+        if response["status"] == "success":
+            decoded_file = base64.b64decode(response["document"])
+            pdf_file_like = io.BytesIO(decoded_file)
+            pdf_reader = PyPDF2.PdfReader(pdf_file_like)
+            number_of_pages = len(pdf_reader.pages)
+            if request.start_page < 1 or request.end_page > number_of_pages or request.start_page > request.end_page:
+                raise HTTPException(status_code=400, detail="Invalid start_page or end_page.")
+            if request.method == "chunk_per_page":
+                chunks = manager_doc.extract_text_from_pdf(pdf_file_like, request.start_page, request.end_page)
+                json_list = process_chunks(chunks, manager_doc, request.schema, request.model,request.comment)
+                return {"status": "success", "json": json_list}
+            elif request.method == "personalize_chunking":
+                personalized_chunks = manager_doc.personalize_chunking(request.split_token, pdf_file_like, request.start_page, request.end_page)
+                json_list = process_chunks(personalized_chunks, manager_doc, request.schema, request.model,request.comment)
+                return {"status": "success", "json": json_list}
+            else:
+                raise HTTPException(status_code=400, detail="Invalid method provided.")
+        return response
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+def process_chunks(chunks, manager_doc, schema, model,comment):
+    json_list = []
+    for chunk in chunks:
+        try:
+            response = manager_doc.get_json(schema, chunk, model,comment)
+        except Exception as e:
+            response = {}
+        json_list.append(response)
+    return json_list

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+fastapi
+uvicorn[standard]
+pydantic
+requests
+typing
+redis
+numpy
+gevent
+PyPDF2
+pdfplumber
+openai
+google-generativeai==0.7.0

srs/utils.py ADDED Viewed

	@@ -0,0 +1,429 @@

+import redis
+import json
+from redis.commands.search.field import TagField, VectorField,TextField
+from redis.commands.search.indexDefinition import IndexDefinition, IndexType
+from redis.commands.search.query import Query
+import logging
+import numpy as np
+import redis.commands.search
+from typing import List, Dict, Any, Optional
+import pdfplumber
+import google.generativeai as genai
+from openai import OpenAI
+import os
+import re
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger()
+r = redis.Redis(host='redis', port=6379, db=0, password=None,decode_responses=True)
+class functions_doc:
+    def __init__(self):
+        self.client = OpenAI(api_key="sk-proj-YrJ4mMndLNB84kUXZ6WiT3BlbkFJJaAQjjupx7nImW0iAYcX")
+        GENERATION_CONFIG = {
+            "temperature": 0.2,
+            "top_p": 0.75,
+            "max_output_tokens": 6000,
+            }
+        SAFETY_SETTINGS= [
+            {
+                "category": "HARM_CATEGORY_HARASSMENT",
+                "threshold": "BLOCK_MEDIUM_AND_ABOVE"
+            },
+            {
+                "category": "HARM_CATEGORY_HATE_SPEECH",
+                "threshold": "BLOCK_MEDIUM_AND_ABOVE"
+            },
+            {
+                "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                "threshold": "BLOCK_MEDIUM_AND_ABOVE"
+            },
+            {
+                "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                "threshold": "BLOCK_MEDIUM_AND_ABOVE"
+            },
+            ]
+            # Define the default schema
+            # Set up the model
+        genai.configure(api_key="AIzaSyDksULr84HdEiR5ls42xo2_Wja3De3abVw")
+        self.model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest",
+                                        generation_config=GENERATION_CONFIG,
+                                        safety_settings=SAFETY_SETTINGS)
+    def add_services_to_token(token: str, services: List[Dict]) -> Dict:
+        """
+        Add multiple services to a token's list of services, checking if a service with the same name already exists.
+        Each service is a dictionary with 'servicename' and 'modelname' (which could be default or empty).
+        :param token: The unique token of the user.
+        :param services: List of service dictionaries to add.
+        :return: A dictionary with a message and the list of added services.
+        """
+        services_key = f"token:{token}:docservices"
+        try:
+            existing_services = r.lrange(services_key, 0, -1)
+            existing_service_names = [json.loads(service)['servicename'] for service in existing_services]
+        except Exception as e:
+            raise Exception("Failed to fetch or parse existing services: " + str(e))
+        if not services or not isinstance(services, list):
+            raise ValueError("Invalid services format. It must be a list of services.")
+        added_services = []
+        for service in services:
+            if not isinstance(service, dict) or 'servicename' not in service:
+                continue
+            servicename = service.get('servicename')
+            if servicename in existing_service_names:
+                continue
+            service_info = json.dumps({"servicename": servicename, "documents": []})
+            try:
+                r.rpush(services_key, service_info)
+                added_services.append(servicename)
+            except Exception as e:
+                raise Exception(f"Failed to add service {servicename}: " + str(e))
+        if not added_services:
+            raise Exception("No new services were added. They may already exist or input was invalid.")
+        return {"message": "Services successfully added.", "added_services": added_services}
+    def remove_service_by_name(self, token, servicename):
+            """
+            Remove a service entry from Redis based on the servicename,
+            and also remove all associated documents.
+            Parameters:
+                token (str): Token to identify the user data.
+                servicename (str): Name of the service to be removed.
+            Returns:
+                dict: Status and message of the operation.
+            """
+            try:
+                # Define the user key
+                user_key = f"token:{token}:docservices"
+                # Start a Redis pipeline
+                pipe = r.pipeline()
+                # Retrieve the length of the list
+                list_length = r.llen(user_key)
+                for i in range(list_length):
+                    # Retrieve each JSON string from the list
+                    service_data = r.lindex(user_key, i)
+                    if service_data:
+                        # Convert JSON string to dictionary
+                        data = json.loads(service_data)
+                        if data["servicename"] == servicename:
+                            # Remove all associated documents
+                            documents = data["documents"]
+                            document_names = [doc['documentname'] for doc in documents]
+                            print(document_names)
+                            self.remove_documents_from_service( token, servicename, document_names)
+                            # Delete each document found
+                            # Remove the JSON string from the list
+                            pipe.lrem(user_key, 0, service_data)
+                # Execute the pipeline
+                pipe.execute()
+                return {"success": True, "message": f"Service {servicename} and all associated documents removed."}
+            except Exception as e:
+                return {"success": False, "message": str(e)}
+    def add_and_store_document(token: str, service_name: str, document_name: str, encoded_file: bytes) -> dict:
+        """
+        Adds a document to a specific service within a user's token and immediately stores the document in Redis.
+        If the document name already exists in the service, it is not appended or stored again.
+        :param token: The unique token of the user.
+        :param service_name: The name of the service to which the document will be added.
+        :param document_name: The name of the document to add.
+        :param encoded_file: The base64 encoded file to be stored.
+        :return: A dictionary with a message indicating the result.
+        """
+        services_key = f"token:{token}:docservices"
+        binary_key_key = f"token:{token}:{service_name}:binarykey"
+        try:
+            existing_services = r.lrange(services_key, 0, -1)
+            service_found = False
+            for i, service in enumerate(existing_services):
+                service_data = json.loads(service)
+                if service_data['servicename'] == service_name:
+                    service_found = True
+                    documents = service_data.get('documents', [])
+                    if any(doc['documentname'] == document_name for doc in documents):
+                        return {"message": "Document already exists in the service."}
+                    # Auto-increment binary key
+                    binary_key = r.incr(binary_key_key)
+                    # Append new document info dictionary
+                    documents.append({'documentname': document_name, 'binarykey': str(binary_key)})
+                    service_data['documents'] = documents
+                    updated_service = json.dumps(service_data)
+                    r.lset(services_key, i, updated_service)
+                    # Store the document in Redis
+                    r.set(service_name + "_" + str(binary_key), encoded_file)
+                    logging.info("Document stored successfully in Redis.")
+                    return {"message": "Document successfully added and stored in the service."}
+            if not service_found:
+                return {"message": "Service not found."}
+        except redis.RedisError as e:
+            logging.error(f"Failed to store document: {e}")
+            return {"status": "error", "message": str(e)}
+        except Exception as e:
+            logging.error(f"An error occurred: {e}")
+            return {"status": "error", "message": "An unexpected error occurred"}
+    def personalize_chunking(self, real_text, pdf_path, start_page, end_page):
+        text = ""
+        with pdfplumber.open(pdf_path) as pdf:
+            # Only iterate over the desired page range
+            for page_number in range(start_page - 1, end_page):
+                page = pdf.pages[page_number]
+                # Extract text from the page with specific tolerances
+                text+= page.extract_text(x_tolerance=2, y_tolerance=4)
+        return text.split(real_text)
+    def extract_text_from_pdf(self, pdf_path, start_page, end_page):
+        chunks = []
+        with pdfplumber.open(pdf_path) as pdf:
+            # Only iterate over the desired page range
+            for page_number in range(start_page - 1, end_page):
+                page = pdf.pages[page_number]
+                # Extract text from the page with specific tolerances
+                text = page.extract_text(x_tolerance=2, y_tolerance=4)
+                chunks.append(text)
+        return chunks
+    def get_document(self, token: str, service_name: str, document_name: str) -> Optional[bytes]:
+        """
+        Retrieve a stored PDF file from Redis based on the token, service_name, and document_name.
+        Each document is assumed to be stored with a unique key constructed from these parameters.
+        """
+        try:
+            # Generate a binary key based on inputs
+            binary_key = self.get_binary_key(token=token, service_name=service_name, document_name=document_name)
+            # Retrieve the document from Redis
+            stored_file = r.get(service_name + "_" + str(binary_key))
+            if stored_file is None:
+                # Log and handle the case where no file is found
+                logging.info("No document found for the specified key.")
+                return  {"status": "error", "message": "No document found for the specified key"}
+            else:
+                # Log success
+                logging.info("Document retrieved successfully from Redis.")
+                return    {"status": "success", "message": "Document retrieved successfully from Redis.","document":stored_file}
+        except redis.RedisError as e:
+            # Log the Redis error
+            logging.error(f"Failed to retrieve document: {e}")
+            return {"status": "error", "message":f"Failed to retrieve document: {e}"}
+        except Exception as e:
+            # Handle other possible exceptions
+            logging.error(f"An error occurred: {e}")
+            return None
+    def get_documents_from_service(self, token: str, servicename: str) -> dict:
+        """
+        Retrieve document names from a specific service within a specific token's list of services.
+        :param token: The unique token of the user.
+        :param servicename: The name of the service from which documents will be retrieved.
+        :return: A dictionary with a list of documents or a message indicating the result.
+        """
+        services_key = f"token:{token}:docservices"
+        try:
+            existing_services = r.lrange(services_key, 0, -1)
+            for service in existing_services:
+                service_data = json.loads(service)
+                if service_data['servicename'] == servicename:
+                    documents = service_data.get('documents', [])
+                    return {"success": True, "documents": documents}
+            return {"message": "Service not found."}
+        except Exception as e:
+            raise Exception("Failed to fetch or parse services: " + str(e))
+    def get_binary_key(self, token:str, service_name:str, document_name:str):
+        result = self.get_documents_from_service(token=token, servicename=service_name)
+        docs = result.get("documents",[])
+        for doc in docs:
+            if doc['documentname']==document_name:
+                return doc['binarykey']
+        return None
+    # def remove_documents_from_service(self, token: str, service_name: str, document_names: list) -> dict:
+    #         """
+    #         Removes multiple PDF documents from Redis and their references from a specific service within a specific token's list of services.
+    #         """
+    #         try:
+    #             services_key = f"token:{token}:docservices"
+    #             existing_services = r.lrange(services_key, 0, -1)
+    #             updated = False
+    #             for i, service in enumerate(existing_services):
+    #                 service_data = json.loads(service)
+    #                 if service_data['servicename'] == service_name:
+    #                     documents = service_data.get('documents', [])
+    #                     new_documents = [doc for doc in documents if doc['documentname'] not in document_names]
+    #                     if len(documents) != len(new_documents):
+    #                         # Update the service data if any documents are removed
+    #                         service_data['documents'] = new_documents
+    #                         updated_service = json.dumps(service_data)
+    #                         r.lset(services_key, i, updated_service)
+    #                         updated = True
+    #                         # Remove documents from direct Redis storage
+    #                         for document_name in document_names:
+    #                             binary_key = self.get_binary_key(token = token, service_name = service_name, document_name =document_name)
+    #                             redis_key = service_name + "_" + str(binary_key)
+    #                             if r.exists(redis_key):
+    #                                 r.delete(redis_key)
+    #                                 logging.info(f"Document with key {redis_key} removed successfully from Redis.")
+    #             if updated:
+    #                 return {"status": "success", "message": "Documents removed successfully from both Redis storage and service list."}
+    #             else:
+    #                 return {"status": "error", "message": "No documents found in the service list or no changes were made."}
+    #         except redis.RedisError as e:
+    #             logging.error(f"Failed to delete documents: {e}")
+    #             return {"status": "error", "message": str(e)}
+    #         except Exception as e:
+    #             logging.error(f"An error occurred: {e}")
+    #             return {"status": "error", "message": "An unexpected error occurred"}
+    def remove_documents_from_service(self, token: str, service_name: str, document_names: List[str]) -> dict:
+            """
+            Removes multiple PDF documents from Redis and their references from a specific service within a specific token's list of services.
+            """
+            try:
+                services_key = f"token:{token}:docservices"
+                existing_services = r.lrange(services_key, 0, -1)
+                updated = False
+                for i, service in enumerate(existing_services):
+                    service_data = json.loads(service)
+                    if service_data['servicename'] == service_name:
+                        documents = service_data.get('documents', [])
+                        new_documents = [doc for doc in documents if doc['documentname'] not in document_names]
+                        if len(documents) != len(new_documents):
+                            # Remove documents from direct Redis storage
+                            for document_name in document_names:
+                                binary_key = self.get_binary_key(token=token, service_name=service_name, document_name=document_name)
+                                redis_key = service_name + "_" + str(binary_key)
+                                print("Redis key",redis_key)
+                                if r.exists(redis_key):
+                                    r.delete(redis_key)
+                                    logging.info(f"Document with key {redis_key} removed successfully from Redis.")
+                            # Update the service data if any documents are removed
+                            service_data['documents'] = new_documents
+                            updated_service = json.dumps(service_data)
+                            r.lset(services_key, i, updated_service)
+                            updated = True
+                if updated:
+                    return {"status": "success", "message": "Documents removed successfully from both Redis storage and service list."}
+                else:
+                    return {"status": "error", "message": "No documents found in the service list or no changes were made."}
+            except redis.RedisError as e:
+                logging.error(f"Failed to delete documents: {e}")
+                return {"status": "error", "message": str(e)}
+            except Exception as e:
+                logging.error(f"An error occurred: {e}")
+                return {"status": "error", "message": "An unexpected error occurred"}
+    def get_json(self, schema, context, model,comment):
+        prompt = "Your task is to extract information from context."
+        var = ""
+        if comment:
+            var = f"""**expilcation of keys in schema**: {comment}"""
+        instruction = f"""
+        **JSON Format (High Priority)**: Provide the output in a properly formatted JSON structure.
+        **Respect Schema (High Priority)**: Utilize the schema below to organize the extracted information from the context. If certain information is absent, leave the corresponding field empty.
+        **Error Handling**: If the context does not contain sufficient information to fulfill the requirements, return the following JSON response: {{"message": "Context lacks the desired information"}}.
+        ```json
+        {{
+        {schema}
+        }}```
+        {var}
+        """
+        template = f"""
+        {prompt}
+        Consider the following:
+        {instruction}
+        CONTEXT:
+        {context}
+        """
+        if model=="gpt-3.5-turbo":
+            response =  self.client.chat.completions.create(
+              model="gpt-3.5-turbo",
+              messages=[{"role": "user", "content": template}
+              ])
+            pred_reponse = response.choices[0].message.content
+            return self.parse_json(pred_reponse)
+        elif model=="gemini":
+            response = self.model.generate_content(template)
+            pred_reponse = response.text
+            return self.parse_json(pred_reponse)
+    def clean_and_load_json(self, s):
+        # Remove comments
+        s = re.sub(r'#.*?\n', '', s)
+        # Remove trailing commas before closing brackets in lists and dictionaries
+        s = re.sub(r',\s*\n\s*(\]|\})', r'\1', s)
+        # Remove patterns like '\n       ...\n'
+        s = re.sub(r'\n\s*\.\.\.\n', '', s)
+        # Remove comma before } or ]
+        s = re.sub(r',\s*(\]|\})', r'\1', s)
+        # Remove unnecessary whitespace
+        s = s.strip()
+        #print(s)
+        # Load the cleaned JSON string
+        return json.loads(s)
+    def parse_json(self, s):
+        try:
+            json_str = json.loads(s)
+        except:
+            # Find the index of the first occurrence of '{\n'
+            start_idx = s.find('{')
+            # Find the index of the last occurrence of '\n}'
+            end_idx = s.rfind('}')
+            # If either index is not found, raise an error
+            if start_idx == -1 or end_idx == -1:
+                raise ValueError("Could not find JSON object in the provided string.")
+            # Extract the JSON substring from start_idx to end_idx (inclusive) + 3 to include the closing '\n}'
+            json_str = s[start_idx:end_idx+1]
+            try:
+                json_str = json.loads(json_str)
+            except:
+                json_str =  self.lean_and_load_json(json_str)
+        return json_str