Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -83,29 +83,6 @@ def load_and_concat_data():
|
|
83 |
filtered_df = filtered_df.drop_duplicates().dropna(subset=['date_posted'])
|
84 |
|
85 |
return filtered_df
|
86 |
-
|
87 |
-
#function to download the data
|
88 |
-
def download_data():
|
89 |
-
api = HfApi()
|
90 |
-
dataset_files = api.list_repo_files(repo_id=f"{HF_USERNAME}/{DATASET_NAME}", repo_type="dataset")
|
91 |
-
csv_files = [file for file in dataset_files if file.endswith('.csv')]
|
92 |
-
|
93 |
-
all_data = []
|
94 |
-
for file in csv_files:
|
95 |
-
try:
|
96 |
-
file_content = api.hf_hub_download(repo_id=f"{HF_USERNAME}/{DATASET_NAME}", filename=file, repo_type="dataset", token=HF_TOKEN)
|
97 |
-
df = pd.read_csv(file_content, engine='pyarrow')
|
98 |
-
all_data.append(df)
|
99 |
-
except Exception:
|
100 |
-
pass # Silently skip files that can't be processed
|
101 |
-
|
102 |
-
if not all_data:
|
103 |
-
return None
|
104 |
-
merged-df=pd.concat(all_data,ignore_index=True)
|
105 |
-
csv_buffer = io.StringIO()
|
106 |
-
merged_df.to_csv(csv_buffer, index=False)
|
107 |
-
csv_buffer.seek(0)
|
108 |
-
return csv_buffer
|
109 |
|
110 |
@st.cache_data()
|
111 |
def get_unique_values(df):
|
|
|
83 |
filtered_df = filtered_df.drop_duplicates().dropna(subset=['date_posted'])
|
84 |
|
85 |
return filtered_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
@st.cache_data()
|
88 |
def get_unique_values(df):
|