redfernstech commited on
Commit
9576521
1 Parent(s): ae6a156

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +236 -88
app.py CHANGED
@@ -1,89 +1,237 @@
1
  import streamlit as st
2
- from streamlit_image_comparison import image_comparison
3
-
4
-
5
- IMAGE_TO_URL = {
6
- "sample_image_1": "https://user-images.githubusercontent.com/34196005/143309873-c0c1f31c-c42e-4a36-834e-da0a2336bb19.jpg",
7
- "sample_image_2": "https://user-images.githubusercontent.com/34196005/143309867-42841f5a-9181-4d22-b570-65f90f2da231.jpg",
8
- }
9
-
10
-
11
- st.set_page_config(
12
- page_title="Streamlit Image Comparison",
13
- page_icon="🔥",
14
- layout="centered",
15
- initial_sidebar_state="auto",
16
- )
17
-
18
- st.markdown(
19
- """
20
- <h2 style='text-align: center'>
21
- Streamlit Image Comparison Demo
22
- </h2>
23
- """,
24
- unsafe_allow_html=True,
25
- )
26
- st.markdown(
27
- """
28
- <p style='text-align: center'>
29
- <a href='https://github.com/fcakyon/streamlit-image-comparison' target='_blank'>https://github.com/fcakyon/streamlit-image-comparison</a>
30
- <br />
31
- Follow me for more! <a href='https://twitter.com/fcakyon' target='_blank'> <img src="https://img.icons8.com/color/48/000000/twitter--v1.png" height="30"></a><a href='https://github.com/fcakyon' target='_blank'><img src="https://img.icons8.com/fluency/48/000000/github.png" height="27"></a><a href='https://www.linkedin.com/in/fcakyon/' target='_blank'><img src="https://img.icons8.com/fluency/48/000000/linkedin.png" height="30"></a> <a href='https://fcakyon.medium.com/' target='_blank'><img src="https://img.icons8.com/ios-filled/48/000000/medium-monogram.png" height="26"></a>
32
- </p>
33
- """,
34
- unsafe_allow_html=True,
35
- )
36
-
37
- st.write("##")
38
-
39
- with st.form(key="Streamlit Image Comparison"):
40
- # image one inputs
41
- col1, col2 = st.columns([3, 1])
42
- with col1:
43
- img1_url = st.text_input("Image one URL:", value=IMAGE_TO_URL["sample_image_1"])
44
- with col2:
45
- img1_text = st.text_input("Image one text:", value="YOLOX")
46
-
47
- # image two inputs
48
- col1, col2 = st.columns([3, 1])
49
- with col1:
50
- img2_url = st.text_input("Image two URL:", value=IMAGE_TO_URL["sample_image_2"])
51
- with col2:
52
- img2_text = st.text_input("Image two text:", value="SAHI+YOLOX")
53
-
54
- # continious parameters
55
- col1, col2 = st.columns([1, 1])
56
- with col1:
57
- starting_position = st.slider(
58
- "Starting position of the slider:", min_value=0, max_value=100, value=50
59
- )
60
- with col2:
61
- width = st.slider(
62
- "Component width:", min_value=400, max_value=1000, value=700, step=100
63
- )
64
-
65
- # boolean parameters
66
- col1, col2, col3, col4 = st.columns([1, 3, 3, 3])
67
- with col2:
68
- show_labels = st.checkbox("Show labels", value=True)
69
- with col3:
70
- make_responsive = st.checkbox("Make responsive", value=True)
71
- with col4:
72
- in_memory = st.checkbox("In memory", value=True)
73
-
74
- # centered submit button
75
- col1, col2, col3 = st.columns([6, 4, 6])
76
- with col2:
77
- submit = st.form_submit_button("Update Render 🔥")
78
-
79
- static_component = image_comparison(
80
- img1=img1_url,
81
- img2=img2_url,
82
- label1=img1_text,
83
- label2=img2_text,
84
- width=width,
85
- starting_position=starting_position,
86
- show_labels=show_labels,
87
- make_responsive=make_responsive,
88
- in_memory=in_memory,
89
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ import os
4
+ import re
5
+ import preprocessor as p
6
+ import joblib
7
+ import base64
8
+
9
+
10
+
11
+ project_description = """
12
+ # Hotel Data Analysis Project
13
+
14
+ ## Overview
15
+
16
+ I have completed a hotel data analysis project using an instant web scraper.
17
+ This project involved scraping hotel data and hotel reviews separately, cleaning the data,
18
+ concatenating it, and performing sentiment analysis on the DataFrame.
19
+ Additionally, I clustered the hotel reviews, applied sentiment analysis, and passed
20
+ those clusters to an LLM (Language Model) to extract strengths and weaknesses of hotels.
21
+
22
+ ## Steps
23
+
24
+ ### 1. Scraping Hotel Data
25
+
26
+ - Utilized an instant web scraper to collect hotel data.
27
+ - Scraped hotel data separately from hotel reviews.
28
+
29
+ ### 2. Data Collection
30
+
31
+ - Collected hotel data and hotel reviews data separately for each hotel.
32
+
33
+ ### 3. Data Cleaning
34
+
35
+ - Cleaned the collected data to remove any inconsistencies or errors.
36
+ - Applied preprocessing techniques to prepare the data for analysis.
37
+
38
+ ### 4. Data Concatenation
39
+
40
+ - Concatenated the cleaned hotel data and hotel reviews data to create a unified dataset for analysis.
41
+
42
+ ### 5. Sentiment Analysis
43
+
44
+ - Performed sentiment analysis on the concatenated DataFrame.
45
+ - Utilized the results to understand the overall sentiment of hotel reviews.
46
+
47
+ ### 6. Clustering Hotel Reviews
48
+
49
+ - Clustered the hotel reviews based on their content to identify patterns and similarities.
50
+
51
+ ### 7. Extracting Strengths and Weaknesses
52
+
53
+ - Passed the clustered reviews to an LLM (Language Model) to extract strengths and weaknesses of hotels.
54
+ - Used the extracted information to gain insights into customer perceptions.
55
+
56
+ ## Conclusion
57
+
58
+ This project demonstrates the use of web scraping, data cleaning, sentiment analysis, and clustering techniques to analyze hotel data.
59
+ The extracted strengths and weaknesses provide valuable insights for hotel management to improve customer satisfaction and service quality.
60
+ """
61
+ def create_download_link(df, filename):
62
+ csv = df.to_csv(index=False)
63
+ b64 = base64.b64encode(csv.encode()).decode()
64
+ href = f'<a href="data:file/csv;base64,{b64}" download="{filename}.csv">Download CSV file</a>'
65
+ return href
66
+
67
+ # Path to the directory containing CSV files
68
+ directory_path = r'hotel reviews'
69
+
70
+ # Get a list of CSV files in the directory
71
+ csv_files = [file for file in os.listdir(directory_path) if file.endswith('.csv')]
72
+
73
+ # Function to concatenate selected columns
74
+ def concatenate_columns(df, selected_columns):
75
+ concatenated_data = df[selected_columns[0]].tolist() + df[selected_columns[1]].tolist()
76
+ return pd.DataFrame({'ConcatenatedData': concatenated_data})
77
+
78
+ # Function to display selected dataset
79
+ def display_selected_dataset(selected_dataset):
80
+ dataset_path = os.path.join(directory_path, selected_dataset)
81
+ selected_df = pd.read_csv(dataset_path)
82
+ st.subheader(f'Dataset: {selected_dataset}')
83
+ st.write(selected_df)
84
+ def clean_tweets(series):
85
+ REPLACE_NO_SPACE = re.compile("[.;:!\'?,\"()\[\]]")
86
+ REPLACE_WITH_SPACE = re.compile("(<br\s*/><br\s*/>)|(\-)|(\/)")
87
+ tempArr = []
88
+ for line in series:
89
+ # Check if the value is NaN
90
+ if pd.isnull(line):
91
+ tempArr.append("")
92
+ continue
93
+ # Send to tweet_processor
94
+ tmpL = p.clean(line)
95
+ # Remove punctuation
96
+ tmpL = REPLACE_NO_SPACE.sub("", tmpL.lower())
97
+ # Replace specific characters with spaces
98
+ tmpL = REPLACE_WITH_SPACE.sub(" ", tmpL)
99
+ # Remove extra spaces
100
+ tmpL = " ".join(tmpL.split())
101
+ tempArr.append(tmpL)
102
+ return tempArr
103
+
104
+ # Streamlit app
105
+ def main():
106
+
107
+
108
+ # Create a menu bar
109
+ menu = st.sidebar.selectbox(
110
+ 'Navigation',
111
+ ['Home', 'collected hotel data', 'Display Hotel Data', 'Display hotel reviews Datasets', 'CSV Column Concatenation and Sentiment Analysis']
112
+ )
113
+
114
+ if menu == 'Home':
115
+ st.markdown(project_description)
116
+
117
+ elif menu == 'collected hotel data':
118
+ # Display DataFrame
119
+ df = pd.read_csv('chennai hotes.csv')
120
+ df1 = pd.read_csv('stream.csv')
121
+ st.subheader('Collected chennai hotes Data')
122
+ st.write(df)
123
+ st.subheader('preprocess applyed data')
124
+ st.write(df1)
125
+
126
+ elif menu == 'Display Hotel Data':
127
+ # Display hotel data
128
+ df = pd.read_csv('stream.csv')
129
+ css = """
130
+ <style>
131
+ .hotel-container {
132
+ border: 1px solid #ddd;
133
+ border-radius: 5px;
134
+ padding: 10px;
135
+ margin-bottom: 20px;
136
+ }
137
+ .hotel-image {
138
+ max-width: 100%;
139
+ border-radius: 5px;
140
+ margin-bottom: 10px;
141
+ }
142
+ .hotel-details {
143
+ font-size: 16px;
144
+ }
145
+ </style>
146
+ """
147
+ st.markdown(css, unsafe_allow_html=True)
148
+ for index, row in df.iterrows():
149
+ st.markdown(f"""
150
+ <div class="hotel-container">
151
+ <img class="hotel-image" src="{row['hotel image']}">
152
+ <div class="hotel-details">
153
+ <h2>{row['Hotel Name']}</h2>
154
+ <p><strong>Rating:</strong> {row['rating']}</p>
155
+ <p><strong>Location:</strong> {row['location']} ({row['nearest places']})</p>
156
+ <p><strong>Website:</strong> <a href="{row['hotel website']}">Website link</a></p>
157
+ <p><strong>Number of Reviews:</strong> {row['number of reviewss 2']}</p>
158
+ <p><strong>Room Type:</strong> {row['room type']}</p>
159
+ <p><strong>Price:</strong> {row['price']}</p>
160
+ <p><strong>Strengths:</strong> {row['Strengths']}</p>
161
+ <p><strong>Weaknesses:</strong> {row['Weaknesses']}</p>
162
+ </div>
163
+ </div>
164
+ """, unsafe_allow_html=True)
165
+
166
+
167
+ elif menu == 'Display hotel reviews Datasets':
168
+ selected_dataset = st.selectbox('Select Dataset', csv_files)
169
+ if selected_dataset:
170
+ display_selected_dataset(selected_dataset)
171
+
172
+ elif menu == 'CSV Column Concatenation and Sentiment Analysis':
173
+ st.title('CSV Column Concatenation and Sentiment Analysis')
174
+
175
+ new_names = {
176
+ 'a3332d346a': 'Reviewer Name',
177
+ 'afac1f68d9': 'Reviewer Country',
178
+ 'abf093bdfe': 'Room Type',
179
+ 'abf093bdfe 2': 'Length of Stay',
180
+ 'abf093bdfe 3': 'Review Date',
181
+ 'abf093bdfe 4': 'Traveler Type',
182
+ 'abf093bdfe 5': 'Second Review Date',
183
+ 'f6431b446c': 'Overall Rating',
184
+ 'a53cbfa6de': 'Positive Comments',
185
+ 'a53cbfa6de 2': 'Negative Comments',
186
+ 'a3332d346a 2': 'Hotel Response',
187
+ 'a53cbfa6de 3': 'Hotel Response1'
188
+ }
189
+
190
+ # File upload
191
+ uploaded_file = st.file_uploader('Upload CSV file', type=['csv'])
192
+ if uploaded_file is not None:
193
+ df = pd.read_csv(uploaded_file)
194
+ df.rename(columns=new_names, inplace=True)
195
+
196
+ # Show original DataFrame
197
+ st.subheader('Original DataFrame:')
198
+ st.write(df)
199
+
200
+ # Select columns
201
+ selected_columns = st.multiselect('Select columns to concatenate', df.columns)
202
+
203
+ if st.button('Concatenate columns'):
204
+ if len(selected_columns) == 2:
205
+ # Concatenate columns
206
+ new_df = concatenate_columns(df, selected_columns)
207
+
208
+ # Remove null values
209
+ new_df = new_df.dropna()
210
+
211
+ # Drop duplicates
212
+ new_df = new_df.drop_duplicates()
213
+
214
+ # Reset the index
215
+ new_df = new_df.reset_index(drop=True)
216
+
217
+ # Clean tweets
218
+ new_df['CleanedData'] = clean_tweets(new_df['ConcatenatedData'])
219
+
220
+ # Load the saved model
221
+ loaded_model = joblib.load('sentiment_analysis_model.pkl')
222
+
223
+ # Apply sentiment analysis
224
+ new_df['Sentiment'] = loaded_model.predict(new_df['CleanedData'])
225
+
226
+ # Display concatenated, cleaned, and sentiment analyzed DataFrame
227
+ st.subheader('Concatenated, Cleaned, and Sentiment Analyzed DataFrame:')
228
+ st.write(new_df)
229
+
230
+ # Create download link
231
+ st.markdown(create_download_link(new_df, 'concatenated_sentiment_analyzed_data'), unsafe_allow_html=True)
232
+ else:
233
+ st.warning('Please select exactly two columns to concatenate.')
234
+
235
+ # Run the app
236
+ if __name__ == '__main__':
237
+ main()