kgauvin603 commited on
Commit
74eb4c5
·
verified ·
1 Parent(s): 56b1c53

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -10
app.py CHANGED
@@ -92,16 +92,47 @@ import requests
92
 
93
  print(f"Pass 2")
94
 
 
 
 
 
 
 
95
  # Command to unzip the file
96
- command = "unzip kgauvin603/10k-reports/Dataset-10k.zip -d dataset"
97
  # Execute the command
98
- try:
99
- subprocess.run(command, check=True, shell=True)
100
- except subprocess.CalledProcessError as e:
101
- print(f"An error occurred: {e}")
102
-
 
 
 
103
  repo_id = "kgauvin603/10k-reports"
104
- file_path = "dataset"
105
- # Get the URL for the file in the repository
106
- file_url = f"https://huggingface.co/{repo_id}/resolve/main/{file_path}"
107
- print(file_url)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
  print(f"Pass 2")
94
 
95
+
96
+ #repo_id = "kgauvin603/10k-reports"
97
+ #file_path = "dataset"
98
+ # Get the URL for the file in the repository
99
+ #file_url = f"https://huggingface.co/{repo_id}/resolve/main/{file_path}"
100
+ #print(file_url)
101
  # Command to unzip the file
102
+ #command = "unzip kgauvin603/10k-reports/Dataset-10k.zip -d dataset"
103
  # Execute the command
104
+ #try:
105
+ # subprocess.run(command, check=True, shell=True)
106
+ #except subprocess.CalledProcessError as e:
107
+ # print(f"An error occurred: {e}")
108
+
109
+ #https://huggingface.co/datasets/kgauvin603/10k-reports
110
+
111
+ # Define the repository and file path
112
  repo_id = "kgauvin603/10k-reports"
113
+ file_path = "Dataset-10k.zip"
114
+
115
+ # Construct the URL for the file in the repository
116
+ file_url = f"https://huggingface.co/datasets/{repo_id}/resolve/main/{file_path}"
117
+ print(f"File URL: {file_url}")
118
+
119
+ # Download the zip file
120
+ response = requests.get(file_url)
121
+ response.raise_for_status() # Ensure the request was successful
122
+
123
+ # Unzip the file in memory
124
+ with zipfile.ZipFile(io.BytesIO(response.content)) as zip_ref:
125
+ # List the files in the zip archive
126
+ zip_file_list = zip_ref.namelist()
127
+ print(f"Files in the zip archive: {zip_file_list}")
128
+
129
+ # Extract specific files or work with them directly in memory
130
+ for file_name in zip_file_list:
131
+ with zip_ref.open(file_name) as file:
132
+ content = file.read()
133
+ print(f"Content of {file_name}: {content[:100]}...") # Print the first 100 characters of each file
134
+
135
+ # If you need to save the extracted files to disk, you can do so as follows:
136
+ # Define the extraction path
137
+ extraction_path = "./dataset"
138
+ import os