Spaces:

Niharmahesh
/

job_easz

Running

App Files Files Community

Niharmahesh commited on Oct 26, 2024

Commit

ba4caa1

verified ·

1 Parent(s): a4dd7d2

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -14

app.py CHANGED Viewed

@@ -8,32 +8,68 @@ HF_TOKEN = st.secrets["HF_TOKEN"]
 HF_USERNAME = st.secrets["HF_USERNAME"]
 DATASET_NAME = "jobeasz"
-def list_and_preview_csv_files():
     api = HfApi()
     dataset_files = api.list_repo_files(repo_id=f"{HF_USERNAME}/{DATASET_NAME}", repo_type="dataset")
     csv_files = [file for file in dataset_files if file.endswith('.csv')]
-    st.write(f"Total CSV files found: {len(csv_files)}")
     for file in csv_files:
-        st.subheader(f"File: {file}")
         try:
             file_content = api.hf_hub_download(repo_id=f"{HF_USERNAME}/{DATASET_NAME}", filename=file, repo_type="dataset", token=HF_TOKEN)
             df = pd.read_csv(file_content)
-            st.write(f"Shape: {df.shape}")
-            st.write("First 5 rows:")
-            st.dataframe(df.head())
-        except pd.errors.EmptyDataError:
-            st.warning(f"File {file} is empty or contains no data.")
         except Exception as e:
-            st.error(f"Error reading file {file}: {str(e)}")
-        st.write("---")
 def main():
-    st.title("Hugging Face Dataset CSV Files Preview")
-    if st.button("List and Preview CSV Files"):
-        list_and_preview_csv_files()
 if __name__ == "__main__":
     main()

 HF_USERNAME = st.secrets["HF_USERNAME"]
 DATASET_NAME = "jobeasz"
+@st.cache_data(ttl=3600)  # Cache for 1 hour
+def load_and_concat_data():
     api = HfApi()
     dataset_files = api.list_repo_files(repo_id=f"{HF_USERNAME}/{DATASET_NAME}", repo_type="dataset")
     csv_files = [file for file in dataset_files if file.endswith('.csv')]
+    all_data = []
     for file in csv_files:
         try:
             file_content = api.hf_hub_download(repo_id=f"{HF_USERNAME}/{DATASET_NAME}", filename=file, repo_type="dataset", token=HF_TOKEN)
             df = pd.read_csv(file_content)
+            all_data.append(df)
         except Exception as e:
+            st.warning(f"Error reading file {file}: {str(e)}")
+    if not all_data:
+        st.error("No valid data found in any of the CSV files.")
+        return pd.DataFrame()
+    concatenated_df = pd.concat(all_data, ignore_index=True)
+    # Filter columns
+    columns_to_keep = [
+        'site', 'job_url', 'title', 'company', 'location',
+        'job_type', 'date_posted', 'is_remote', 'description', 'company_url'
+    ]
+    filtered_df = concatenated_df[columns_to_keep].reset_index(drop=True)
+    # Ensure 'date_posted' is in datetime format
+    filtered_df['date_posted'] = pd.to_datetime(filtered_df['date_posted'], errors='coerce')
+    return filtered_df
 def main():
+    st.title("Concatenated Job Listings Data")
+    if st.button("Load and Preview Concatenated Data"):
+        with st.spinner("Loading and concatenating data..."):
+            df = load_and_concat_data()
+        if not df.empty:
+            st.success(f"Successfully loaded and concatenated data. Total rows: {len(df)}")
+            st.subheader("Data Preview")
+            st.dataframe(df.head())
+            st.subheader("Dataset Statistics")
+            st.write(f"Total job listings: {len(df)}")
+            st.write(f"Unique companies: {df['company'].nunique()}")
+            st.write(f"Unique locations: {df['location'].nunique()}")
+            st.write(f"Date range: {df['date_posted'].min()} to {df['date_posted'].max()}")
+            # Allow user to download the concatenated dataset
+            csv = df.to_csv(index=False)
+            st.download_button(
+                label="Download concatenated dataset as CSV",
+                data=csv,
+                file_name="concatenated_job_listings.csv",
+                mime="text/csv",
+            )
+        else:
+            st.error("No data available to display.")
 if __name__ == "__main__":
     main()