sanaa-11 commited on
Commit
2c3629a
·
verified ·
1 Parent(s): 95b030f

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +123 -0
  2. avito_cars.csv +0 -0
app.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from sklearn.metrics.pairwise import cosine_similarity
3
+ from sklearn.preprocessing import normalize
4
+ from sklearn.decomposition import TruncatedSVD
5
+ from sklearn.feature_extraction.text import TfidfVectorizer
6
+ import pandas as pd
7
+
8
+ # Load the dataset
9
+ dataset = pd.read_csv('avito_cars.csv')
10
+
11
+ # Combine relevant columns into 'content'
12
+ dataset['content'] = (
13
+ dataset['Marque'] + " " +
14
+ dataset['Modèle'] + " " +
15
+ dataset['Type de carburant'] + " " +
16
+ dataset['Boite de vitesses']
17
+ )
18
+ dataset = dataset.drop_duplicates(subset=['content']) # Remove duplicates
19
+
20
+ # Preprocess and build TF-IDF and LSI
21
+ vectorizer = TfidfVectorizer(stop_words=None)
22
+ tfidf_matrix = vectorizer.fit_transform(dataset['content'])
23
+
24
+ n_components = 50 # Number of LSI dimensions
25
+ svd = TruncatedSVD(n_components=n_components)
26
+ lsi_matrix = svd.fit_transform(tfidf_matrix)
27
+ lsi_matrix = normalize(lsi_matrix)
28
+
29
+ # Search function
30
+ def search(query, top_n=100):
31
+ query_tfidf = vectorizer.transform([query])
32
+ query_lsi = svd.transform(query_tfidf)
33
+ query_lsi = normalize(query_lsi)
34
+ similarities = cosine_similarity(query_lsi, lsi_matrix).flatten()
35
+ top_indices = similarities.argsort()[-top_n:][::-1]
36
+ results = dataset.iloc[top_indices]
37
+ return results, similarities[top_indices]
38
+
39
+ # Streamlit Interface
40
+ st.title("Moteur de recherche de voitures basé sur le LSI (Latent Semantic Indexing)")
41
+ st.write("Recherchez des voitures en utilisant des mots-clés (par ex. : 'Peugeot Diesel Manuelle').")
42
+
43
+ # User input
44
+ query = st.text_input("Entrez votre requête de recherche :")
45
+ top_n = st.slider("Nombre de résultats à afficher par page :", min_value=3, max_value=12, step=3, value=6)
46
+
47
+ # Pagination logic
48
+ if "page" not in st.session_state:
49
+ st.session_state.page = 1
50
+
51
+ #if st.button("Previous Page"):
52
+ # st.session_state.page = max(1, st.session_state.page - 1)
53
+
54
+ #if st.button("Next Page"):
55
+ # st.session_state.page += 1
56
+
57
+ # Search and display
58
+ if st.button("Search") or query.strip():
59
+ results, similarities = search(query)
60
+ total_results = len(results)
61
+ results_per_page = top_n
62
+ total_pages = (total_results // results_per_page) + (1 if total_results % results_per_page != 0 else 0)
63
+
64
+ # Paginate results
65
+ start_idx = (st.session_state.page - 1) * results_per_page
66
+ end_idx = start_idx + results_per_page
67
+ paginated_results = results.iloc[start_idx:end_idx]
68
+ st.write(f"Showing results {start_idx + 1}-{min(end_idx, total_results)} of {total_results} (Page {st.session_state.page}/{total_pages}):")
69
+
70
+ # Start the grid layout
71
+ # Display cards in rows using Streamlit's `st.columns()`
72
+ for i, (index, row) in enumerate(paginated_results.iterrows()):
73
+ if i % 3 == 0: # Create a new row every 3 cards
74
+ cols = st.columns(3) # 3 cards per row
75
+
76
+ # Use the appropriate column in the row
77
+ with cols[i % 3]:
78
+ link = row['Lien']
79
+ st.markdown(
80
+ f"""
81
+ <div style="
82
+ border: 1px solid green;
83
+ border-radius: 10px;
84
+ padding: 10px;
85
+ background-color: #f9f9f9;
86
+ text-align: left;
87
+ height: auto;
88
+ margin-bottom: 20px;
89
+ ">
90
+ <h5>{row['content']}</h5>
91
+ <p><strong>Année-Modèle:</strong> {row['Année-Modèle']}</p>
92
+ <p><strong>Price:</strong> {row['Prix']} MAD</p>
93
+ <p><strong>City:</strong> {row['Ville']}</p>
94
+ <p><strong>Kilométrage:</strong> {row['Kilométrage']} km</p>
95
+ <a href="{link}" target="_blank" style="
96
+ display: block;
97
+ margin: 10px auto 0 auto;
98
+ background-color: #4CAF50;
99
+ color: white;
100
+ padding: 5px 10px;
101
+ text-align: center;
102
+ text-decoration: none;
103
+ border-radius: 5px;">
104
+ View Details
105
+ </a>
106
+ </div>
107
+ """,
108
+ unsafe_allow_html=True,
109
+ )
110
+
111
+
112
+
113
+
114
+
115
+ # Pagination controls
116
+ st.write("Navigation:")
117
+ col1, col2, col3 = st.columns(3)
118
+ with col1:
119
+ if st.button("Previous"):
120
+ st.session_state.page = max(1, st.session_state.page - 1)
121
+ with col3:
122
+ if st.button("Next"):
123
+ st.session_state.page += 1
avito_cars.csv ADDED
The diff for this file is too large to render. See raw diff