Spaces:
Sleeping
Sleeping
# Import the necessary libraries | |
import subprocess | |
import sys | |
# Function to install a package using pip | |
def install(package): | |
subprocess.check_call([sys.executable, "-m", "pip", "install", package]) | |
# Install required packages | |
try: | |
install("gradio") | |
install("openai==1.23.2") | |
install("tiktoken==0.6.0") | |
install("pypdf==4.0.1") | |
install("langchain==0.1.1") | |
install("langchain-community==0.0.13") | |
install("chromadb==0.4.22") | |
install("sentence-transformers==2.3.1") | |
except subprocess.CalledProcessError as e: | |
print(f"An error occurred: {e}") | |
import gradio as gr | |
import os | |
import uuid | |
import json | |
import pandas as pd | |
import subprocess | |
from openai import OpenAI | |
from huggingface_hub import HfApi | |
from huggingface_hub import CommitScheduler | |
from huggingface_hub import hf_hub_download | |
import zipfile | |
# Define your repository and file path | |
repo_id = "kgauvin603/rag-10k" | |
#file_path = "dataset.zip" | |
# Download the file | |
#downloaded_file = hf_hub_download(repo_id, file_path) | |
# Print the path to the downloaded file | |
#print(f"Downloaded file is located at: {downloaded_file}") | |
from langchain_community.embeddings.sentence_transformer import ( | |
SentenceTransformerEmbeddings | |
) | |
from langchain_community.vectorstores import Chroma | |
#from google.colab import userdata, drive | |
from pathlib import Path | |
from langchain.document_loaders import PyPDFDirectoryLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
import json | |
import tiktoken | |
import pandas as pd | |
import tiktoken | |
print(f"Passed import of tiktoken" | |
# Define the embedding model and the vectorstore | |
embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large') | |
# If dataset directory exixts, remove it and all of the contents within | |
#if os.path.exists('dataset'): | |
# !rm -rf dataset | |
# If collection_db exists, remove it and all of the contents within | |
#if os.path.exists('collection_db'): | |
# !rm -rf dataset | |
#Mount the Google Drive | |
#drive.mount('/content/drive') | |
#Upload Dataset-10k.zip and unzip it dataset folder using -d option | |
#!unzip Dataset-10k.zip -d dataset | |
import subprocess | |
# Command to unzip the file | |
#command = "unzip kgauvin603/rag-10k-analysis/Dataset-10k.zip -d dataset" | |
command = "pip install transformers huggingface_hub requests" | |
# Execute the command | |
try: | |
subprocess.run(command, check=True, shell=True) | |
except subprocess.CalledProcessError as e: | |
print(f"An error occurred: {e}") | |
from huggingface_hub import hf_hub_download | |
import zipfile | |
import os | |
import requests | |
# Provid | |
# | |
repo_id = "kgauvin603/rag-10k" | |
file_path = "dataset" | |
# Get the URL for the file in the repository | |
file_url = f"https://huggingface.co/{repo_id}/resolve/main/{file_path}" | |