job

Sleeping

App Files Files Community

redfernstech commited on Oct 29, 2024

Commit

4aefa71

verified ·

1 Parent(s): ebb45b9

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -230

app.py CHANGED Viewed

@@ -1,237 +1,22 @@
 import streamlit as st
 import pandas as pd
-import os
-import re
-import preprocessor as p
-import joblib
-import base64
-project_description = """
-# Hotel Data Analysis Project
-## Overview
-I have completed a hotel data analysis project using an instant web scraper.
-This project involved scraping hotel data and hotel reviews separately, cleaning the data,
-concatenating it, and performing sentiment analysis on the DataFrame.
-Additionally, I clustered the hotel reviews, applied sentiment analysis, and passed
-those clusters to an LLM (Language Model) to extract strengths and weaknesses of hotels.
-## Steps
-### 1. Scraping Hotel Data
-- Utilized an instant web scraper to collect hotel data.
-- Scraped hotel data separately from hotel reviews.
-### 2. Data Collection
-- Collected hotel data and hotel reviews data separately for each hotel.
-### 3. Data Cleaning
-- Cleaned the collected data to remove any inconsistencies or errors.
-- Applied preprocessing techniques to prepare the data for analysis.
-### 4. Data Concatenation
-- Concatenated the cleaned hotel data and hotel reviews data to create a unified dataset for analysis.
-### 5. Sentiment Analysis
-- Performed sentiment analysis on the concatenated DataFrame.
-- Utilized the results to understand the overall sentiment of hotel reviews.
-### 6. Clustering Hotel Reviews
-- Clustered the hotel reviews based on their content to identify patterns and similarities.
-### 7. Extracting Strengths and Weaknesses
-- Passed the clustered reviews to an LLM (Language Model) to extract strengths and weaknesses of hotels.
-- Used the extracted information to gain insights into customer perceptions.
-## Conclusion
-This project demonstrates the use of web scraping, data cleaning, sentiment analysis, and clustering techniques to analyze hotel data.
-The extracted strengths and weaknesses provide valuable insights for hotel management to improve customer satisfaction and service quality.
-"""
-def create_download_link(df, filename):
-    csv = df.to_csv(index=False)
-    b64 = base64.b64encode(csv.encode()).decode()
-    href = f'<a href="data:file/csv;base64,{b64}" download="{filename}.csv">Download CSV file</a>'
-    return href
-# Path to the directory containing CSV files
-directory_path = r'hotel reviews'
-# Get a list of CSV files in the directory
-csv_files = [file for file in os.listdir(directory_path) if file.endswith('.csv')]
-# Function to concatenate selected columns
-def concatenate_columns(df, selected_columns):
-    concatenated_data = df[selected_columns[0]].tolist() + df[selected_columns[1]].tolist()
-    return pd.DataFrame({'ConcatenatedData': concatenated_data})
-# Function to display selected dataset
-def display_selected_dataset(selected_dataset):
-    dataset_path = os.path.join(directory_path, selected_dataset)
-    selected_df = pd.read_csv(dataset_path)
-    st.subheader(f'Dataset: {selected_dataset}')
-    st.write(selected_df)
-def clean_tweets(series):
-    REPLACE_NO_SPACE = re.compile("[.;:!\'?,\"()\[\]]")
-    REPLACE_WITH_SPACE = re.compile("(<br\s*/><br\s*/>)|(\-)|(\/)")
-    tempArr = []
-    for line in series:
-        # Check if the value is NaN
-        if pd.isnull(line):
-            tempArr.append("")
-            continue
-        # Send to tweet_processor
-        tmpL = p.clean(line)
-        # Remove punctuation
-        tmpL = REPLACE_NO_SPACE.sub("", tmpL.lower())
-        # Replace specific characters with spaces
-        tmpL = REPLACE_WITH_SPACE.sub(" ", tmpL)
-        # Remove extra spaces
-        tmpL = " ".join(tmpL.split())
-        tempArr.append(tmpL)
-    return tempArr
-# Streamlit app
-def main():
-    # Create a menu bar
-    menu = st.sidebar.selectbox(
-        'Navigation',
-        ['Home', 'collected hotel data', 'Display Hotel Data', 'Display hotel reviews Datasets']
-    )
-    if menu == 'Home':
-        st.markdown(project_description)
-    elif menu == 'collected hotel data':
-        # Display DataFrame
-        df = pd.read_csv('chennai hotes.csv')
-        df1 = pd.read_csv('stream.csv')
-        st.subheader('Collected chennai hotes Data')
-        st.write(df)
-        st.subheader('preprocess applyed data')
-        st.write(df1)
-    elif menu == 'Display Hotel Data':
-        # Display hotel data
-        df = pd.read_csv('stream.csv')
-        css = """
-            <style>
-                .hotel-container {
-                    border: 1px solid #ddd;
-                    border-radius: 5px;
-                    padding: 10px;
-                    margin-bottom: 20px;
-                }
-                .hotel-image {
-                    max-width: 100%;
-                    border-radius: 5px;
-                    margin-bottom: 10px;
-                }
-                .hotel-details {
-                    font-size: 16px;
-                }
-            </style>
-        """
-        st.markdown(css, unsafe_allow_html=True)
-        for index, row in df.iterrows():
-            st.markdown(f"""
-        <div class="hotel-container">
-            <img class="hotel-image" src="{row['hotel image']}">
-            <div class="hotel-details">
-                <h2>{row['Hotel Name']}</h2>
-                <p><strong>Rating:</strong> {row['rating']}</p>
-                <p><strong>Location:</strong> {row['location']} ({row['nearest places']})</p>
-                <p><strong>Website:</strong> <a href="{row['hotel website']}">Website link</a></p>
-                <p><strong>Number of Reviews:</strong> {row['number of reviewss 2']}</p>
-                <p><strong>Room Type:</strong> {row['room type']}</p>
-                <p><strong>Price:</strong> {row['price']}</p>
-                <p><strong>Strengths:</strong> {row['Strengths']}</p>
-                <p><strong>Weaknesses:</strong> {row['Weaknesses']}</p>
-            </div>
-        </div>
-       """, unsafe_allow_html=True)
-    elif menu == 'Display hotel reviews Datasets':
-        selected_dataset = st.selectbox('Select Dataset', csv_files)
-        if selected_dataset:
-            display_selected_dataset(selected_dataset)
-    elif menu == 'CSV Column Concatenation and Sentiment Analysis':
-        st.title('CSV Column Concatenation and Sentiment Analysis')
-        new_names = {
-            'a3332d346a': 'Reviewer Name',
-            'afac1f68d9': 'Reviewer Country',
-            'abf093bdfe': 'Room Type',
-            'abf093bdfe 2': 'Length of Stay',
-            'abf093bdfe 3': 'Review Date',
-            'abf093bdfe 4': 'Traveler Type',
-            'abf093bdfe 5': 'Second Review Date',
-            'f6431b446c': 'Overall Rating',
-            'a53cbfa6de': 'Positive Comments',
-            'a53cbfa6de 2': 'Negative Comments',
-            'a3332d346a 2': 'Hotel Response',
-            'a53cbfa6de 3': 'Hotel Response1'
-        }
-        # File upload
-        uploaded_file = st.file_uploader('Upload CSV file', type=['csv'])
-        if uploaded_file is not None:
-            df = pd.read_csv(uploaded_file)
-            df.rename(columns=new_names, inplace=True)
-            # Show original DataFrame
-            st.subheader('Original DataFrame:')
-            st.write(df)
-            # Select columns
-            selected_columns = st.multiselect('Select columns to concatenate', df.columns)
-            if st.button('Concatenate columns'):
-                if len(selected_columns) == 2:
-                    # Concatenate columns
-                    new_df = concatenate_columns(df, selected_columns)
-                    # Remove null values
-                    new_df = new_df.dropna()
-                    # Drop duplicates
-                    new_df = new_df.drop_duplicates()
-                    # Reset the index
-                    new_df = new_df.reset_index(drop=True)
-                    # Clean tweets
-                    new_df['CleanedData'] = clean_tweets(new_df['ConcatenatedData'])
-                    # Load the saved model
-                    loaded_model = joblib.load('sentiment_analysis_model.pkl')
-                    # Apply sentiment analysis
-                    new_df['Sentiment'] = loaded_model.predict(new_df['CleanedData'])
-                    # Display concatenated, cleaned, and sentiment analyzed DataFrame
-                    st.subheader('Concatenated, Cleaned, and Sentiment Analyzed DataFrame:')
-                    st.write(new_df)
-                    # Create download link
-                    st.markdown(create_download_link(new_df, 'concatenated_sentiment_analyzed_data'), unsafe_allow_html=True)
-                else:
-                    st.warning('Please select exactly two columns to concatenate.')
-# Run the app
-if __name__ == '__main__':
-    main()

 import streamlit as st
 import pandas as pd
+# Load the CSV data
+file_path = 'category upwork jobs.csv'
+jobs_df = pd.read_csv(file_path)
+# Sidebar: Select Category
+st.sidebar.header("Filter Jobs by Category")
+categories = jobs_df['category'].unique()  # Extract unique categories
+selected_category = st.sidebar.selectbox("Choose a category:", categories)
+# Filter jobs based on the selected category
+filtered_jobs = jobs_df[jobs_df['category'] == selected_category]
+# Main: Display filtered jobs
+st.title("Jobs Dashboard")
+st.write(f"Showing jobs in category: **{selected_category}**")
+st.dataframe(filtered_jobs[['title']])  # Adjust columns as needed
+# Optional: Show a count of jobs in the selected category
+st.write(f"Total jobs in this category: {len(filtered_jobs)}")