Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -92,16 +92,47 @@ import requests
|
|
92 |
|
93 |
print(f"Pass 2")
|
94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
# Command to unzip the file
|
96 |
-
command = "unzip kgauvin603/10k-reports/Dataset-10k.zip -d dataset"
|
97 |
# Execute the command
|
98 |
-
try:
|
99 |
-
subprocess.run(command, check=True, shell=True)
|
100 |
-
except subprocess.CalledProcessError as e:
|
101 |
-
print(f"An error occurred: {e}")
|
102 |
-
|
|
|
|
|
|
|
103 |
repo_id = "kgauvin603/10k-reports"
|
104 |
-
file_path = "
|
105 |
-
|
106 |
-
|
107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
print(f"Pass 2")
|
94 |
|
95 |
+
|
96 |
+
#repo_id = "kgauvin603/10k-reports"
|
97 |
+
#file_path = "dataset"
|
98 |
+
# Get the URL for the file in the repository
|
99 |
+
#file_url = f"https://huggingface.co/{repo_id}/resolve/main/{file_path}"
|
100 |
+
#print(file_url)
|
101 |
# Command to unzip the file
|
102 |
+
#command = "unzip kgauvin603/10k-reports/Dataset-10k.zip -d dataset"
|
103 |
# Execute the command
|
104 |
+
#try:
|
105 |
+
# subprocess.run(command, check=True, shell=True)
|
106 |
+
#except subprocess.CalledProcessError as e:
|
107 |
+
# print(f"An error occurred: {e}")
|
108 |
+
|
109 |
+
#https://huggingface.co/datasets/kgauvin603/10k-reports
|
110 |
+
|
111 |
+
# Define the repository and file path
|
112 |
repo_id = "kgauvin603/10k-reports"
|
113 |
+
file_path = "Dataset-10k.zip"
|
114 |
+
|
115 |
+
# Construct the URL for the file in the repository
|
116 |
+
file_url = f"https://huggingface.co/datasets/{repo_id}/resolve/main/{file_path}"
|
117 |
+
print(f"File URL: {file_url}")
|
118 |
+
|
119 |
+
# Download the zip file
|
120 |
+
response = requests.get(file_url)
|
121 |
+
response.raise_for_status() # Ensure the request was successful
|
122 |
+
|
123 |
+
# Unzip the file in memory
|
124 |
+
with zipfile.ZipFile(io.BytesIO(response.content)) as zip_ref:
|
125 |
+
# List the files in the zip archive
|
126 |
+
zip_file_list = zip_ref.namelist()
|
127 |
+
print(f"Files in the zip archive: {zip_file_list}")
|
128 |
+
|
129 |
+
# Extract specific files or work with them directly in memory
|
130 |
+
for file_name in zip_file_list:
|
131 |
+
with zip_ref.open(file_name) as file:
|
132 |
+
content = file.read()
|
133 |
+
print(f"Content of {file_name}: {content[:100]}...") # Print the first 100 characters of each file
|
134 |
+
|
135 |
+
# If you need to save the extracted files to disk, you can do so as follows:
|
136 |
+
# Define the extraction path
|
137 |
+
extraction_path = "./dataset"
|
138 |
+
import os
|