# Import the necessary libraries import subprocess import sys # Function to install a package using pip def install(package): subprocess.check_call([sys.executable, "-m", "pip", "install", package]) # Install required packages try: install("gradio") install("openai==1.23.2") install("tiktoken==0.6.0") install("pypdf==4.0.1") install("langchain==0.1.1") install("langchain-community==0.0.13") install("chromadb==0.4.22") install("sentence-transformers==2.3.1") except subprocess.CalledProcessError as e: print(f"An error occurred: {e}") import gradio as gr import os import uuid import json import pandas as pd import subprocess from openai import OpenAI from huggingface_hub import HfApi from huggingface_hub import CommitScheduler from huggingface_hub import hf_hub_download import zipfile # Define your repository and file path repo_id = "kgauvin603/rag-10k" #file_path = "dataset.zip" # Download the file #downloaded_file = hf_hub_download(repo_id, file_path) # Print the path to the downloaded file #print(f"Downloaded file is located at: {downloaded_file}") from langchain_community.embeddings.sentence_transformer import ( SentenceTransformerEmbeddings ) from langchain_community.vectorstores import Chroma #from google.colab import userdata, drive from pathlib import Path from langchain.document_loaders import PyPDFDirectoryLoader from langchain.text_splitter import RecursiveCharacterTextSplitter import json import tiktoken import pandas as pd import tiktoken print(f"Passed import of tiktoken" # Define the embedding model and the vectorstore embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large') # If dataset directory exixts, remove it and all of the contents within #if os.path.exists('dataset'): # !rm -rf dataset # If collection_db exists, remove it and all of the contents within #if os.path.exists('collection_db'): # !rm -rf dataset #Mount the Google Drive #drive.mount('/content/drive') #Upload Dataset-10k.zip and unzip it dataset folder using -d option #!unzip Dataset-10k.zip -d dataset import subprocess # Command to unzip the file #command = "unzip kgauvin603/rag-10k-analysis/Dataset-10k.zip -d dataset" command = "pip install transformers huggingface_hub requests" # Execute the command try: subprocess.run(command, check=True, shell=True) except subprocess.CalledProcessError as e: print(f"An error occurred: {e}") from huggingface_hub import hf_hub_download import zipfile import os import requests # Provid # repo_id = "kgauvin603/rag-10k" file_path = "dataset" # Get the URL for the file in the repository file_url = f"https://huggingface.co/{repo_id}/resolve/main/{file_path}"