Spaces:

Namanj46
/

llama_index

Runtime error

Namanj46 commited on Jul 16, 2024

Commit

bea313d

1 Parent(s): ef33567

AWS S3 Integration

Files changed (4) hide show

.ipynb_checkpoints/app-checkpoint.py CHANGED Viewed

@@ -2,20 +2,39 @@ import os
 import openai
 import gradio as gr
 import pdfplumber
 from llama_index.core import Document, VectorStoreIndex, Settings
 from llama_index.llms.openai import OpenAI
 from llama_index.embeddings.openai import OpenAIEmbedding
 from llama_index.core.postprocessor import MetadataReplacementPostProcessor
 from llama_index.core.node_parser import SentenceWindowNodeParser
 # Set your OpenAI API key here
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 openai.api_key = OPENAI_API_KEY
-# Get the current working directory and join with 'resumes'
 resume_path = 'resumes'
-if not os.path.exists(resume_path):
-    raise ValueError(f"Directory 'resumes' not found")
 # Function to load PDFs using pdfplumber
 def load_pdfs_with_pdfplumber(directory):

 import openai
 import gradio as gr
 import pdfplumber
+import boto3
 from llama_index.core import Document, VectorStoreIndex, Settings
 from llama_index.llms.openai import OpenAI
 from llama_index.embeddings.openai import OpenAIEmbedding
 from llama_index.core.postprocessor import MetadataReplacementPostProcessor
 from llama_index.core.node_parser import SentenceWindowNodeParser
+from dotenv import load_dotenv
+load_dotenv("config.env")
 # Set your OpenAI API key here
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 openai.api_key = OPENAI_API_KEY
+# AWS S3 setup
+s3_bucket_name = "sagemaker-studio-gm4vm5dimae"
+s3_client = boto3.client('s3')
+# Directory to store downloaded PDFs
 resume_path = 'resumes'
+os.makedirs(resume_path, exist_ok=True)
+# Function to download PDFs from S3
+def download_pdfs_from_s3(bucket_name, local_path):
+    objects = s3_client.list_objects_v2(Bucket=bucket_name)
+    for obj in objects.get('Contents', []):
+        file_name = obj['Key']
+        local_file_path = os.path.join(local_path, file_name)
+        s3_client.download_file(bucket_name, file_name, local_file_path)
+        print(f"Downloaded {file_name} to {local_file_path}")
+# Download PDFs
+download_pdfs_from_s3(s3_bucket_name, resume_path)
 # Function to load PDFs using pdfplumber
 def load_pdfs_with_pdfplumber(directory):

.ipynb_checkpoints/requirements-checkpoint.txt CHANGED Viewed

+gradio
+pdfplumber
+openai
+llama_index
+boto3

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os
 import openai
 import gradio as gr
 import pdfplumber
 from llama_index.core import Document, VectorStoreIndex, Settings
 from llama_index.llms.openai import OpenAI
 from llama_index.embeddings.openai import OpenAIEmbedding
@@ -15,10 +16,25 @@ load_dotenv("config.env")
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 openai.api_key = OPENAI_API_KEY
-# Get the current working directory and join with 'resumes'
 resume_path = 'resumes'
-if not os.path.exists(resume_path):
-    raise ValueError(f"Directory 'resumes' not found")
 # Function to load PDFs using pdfplumber
 def load_pdfs_with_pdfplumber(directory):

 import openai
 import gradio as gr
 import pdfplumber
+import boto3
 from llama_index.core import Document, VectorStoreIndex, Settings
 from llama_index.llms.openai import OpenAI
 from llama_index.embeddings.openai import OpenAIEmbedding
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 openai.api_key = OPENAI_API_KEY
+# AWS S3 setup
+s3_bucket_name = "sagemaker-studio-gm4vm5dimae"
+s3_client = boto3.client('s3')
+# Directory to store downloaded PDFs
 resume_path = 'resumes'
+os.makedirs(resume_path, exist_ok=True)
+# Function to download PDFs from S3
+def download_pdfs_from_s3(bucket_name, local_path):
+    objects = s3_client.list_objects_v2(Bucket=bucket_name)
+    for obj in objects.get('Contents', []):
+        file_name = obj['Key']
+        local_file_path = os.path.join(local_path, file_name)
+        s3_client.download_file(bucket_name, file_name, local_file_path)
+        print(f"Downloaded {file_name} to {local_file_path}")
+# Download PDFs
+download_pdfs_from_s3(s3_bucket_name, resume_path)
 # Function to load PDFs using pdfplumber
 def load_pdfs_with_pdfplumber(directory):

requirements.txt CHANGED Viewed

@@ -2,3 +2,5 @@ gradio
 pdfplumber
 openai
 llama_index

 pdfplumber
 openai
 llama_index
+boto3