Spaces:

kgauvin603
/

rag-10k-analysis

Sleeping

App Files Files Community

kgauvin603 commited on Jun 24, 2024

Commit

74eb4c5

verified ·

1 Parent(s): 56b1c53

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -10

app.py CHANGED Viewed

@@ -92,16 +92,47 @@ import requests
 print(f"Pass 2")
 # Command to unzip the file
-command = "unzip kgauvin603/10k-reports/Dataset-10k.zip -d dataset"
 # Execute the command
-try:
-    subprocess.run(command, check=True, shell=True)
-except subprocess.CalledProcessError as e:
-    print(f"An error occurred: {e}")
 repo_id = "kgauvin603/10k-reports"
-file_path = "dataset"
-# Get the URL for the file in the repository
-file_url = f"https://huggingface.co/{repo_id}/resolve/main/{file_path}"
-print(file_url)

 print(f"Pass 2")
+#repo_id = "kgauvin603/10k-reports"
+#file_path = "dataset"
+# Get the URL for the file in the repository
+#file_url = f"https://huggingface.co/{repo_id}/resolve/main/{file_path}"
+#print(file_url)
 # Command to unzip the file
+#command = "unzip kgauvin603/10k-reports/Dataset-10k.zip -d dataset"
 # Execute the command
+#try:
+#    subprocess.run(command, check=True, shell=True)
+#except subprocess.CalledProcessError as e:
+#    print(f"An error occurred: {e}")
+#https://huggingface.co/datasets/kgauvin603/10k-reports
+# Define the repository and file path
 repo_id = "kgauvin603/10k-reports"
+file_path = "Dataset-10k.zip"
+# Construct the URL for the file in the repository
+file_url = f"https://huggingface.co/datasets/{repo_id}/resolve/main/{file_path}"
+print(f"File URL: {file_url}")
+# Download the zip file
+response = requests.get(file_url)
+response.raise_for_status()  # Ensure the request was successful
+# Unzip the file in memory
+with zipfile.ZipFile(io.BytesIO(response.content)) as zip_ref:
+    # List the files in the zip archive
+    zip_file_list = zip_ref.namelist()
+    print(f"Files in the zip archive: {zip_file_list}")
+    # Extract specific files or work with them directly in memory
+    for file_name in zip_file_list:
+        with zip_ref.open(file_name) as file:
+            content = file.read()
+            print(f"Content of {file_name}: {content[:100]}...")  # Print the first 100 characters of each file
+# If you need to save the extracted files to disk, you can do so as follows:
+# Define the extraction path
+extraction_path = "./dataset"
+import os