import streamlit as st from sklearn.metrics.pairwise import cosine_similarity from sklearn.preprocessing import normalize from sklearn.decomposition import TruncatedSVD from sklearn.feature_extraction.text import TfidfVectorizer import pandas as pd # Load the dataset dataset = pd.read_csv('avito_cars.csv') # Combine relevant columns into 'content' dataset['content'] = ( dataset['Marque'] + " " + dataset['Modèle'] + " " + dataset['Type de carburant'] + " " + dataset['Boite de vitesses'] ) dataset = dataset.drop_duplicates(subset=['content']) # Remove duplicates # Preprocess and build TF-IDF and LSI vectorizer = TfidfVectorizer(stop_words=None) tfidf_matrix = vectorizer.fit_transform(dataset['content']) n_components = 50 # Number of LSI dimensions svd = TruncatedSVD(n_components=n_components) lsi_matrix = svd.fit_transform(tfidf_matrix) lsi_matrix = normalize(lsi_matrix) # Search function def search(query, top_n=100): query_tfidf = vectorizer.transform([query]) query_lsi = svd.transform(query_tfidf) query_lsi = normalize(query_lsi) similarities = cosine_similarity(query_lsi, lsi_matrix).flatten() top_indices = similarities.argsort()[-top_n:][::-1] results = dataset.iloc[top_indices] return results, similarities[top_indices] # Streamlit Interface st.title("Moteur de recherche de voitures basé sur le LSI (Latent Semantic Indexing)") st.write("Recherchez des voitures en utilisant des mots-clés (par ex. : 'Peugeot Diesel Manuelle').") # User input query = st.text_input("Entrez votre requête de recherche :") top_n = st.slider("Nombre de résultats à afficher par page :", min_value=3, max_value=12, step=3, value=6) # Pagination logic if "page" not in st.session_state: st.session_state.page = 1 #if st.button("Previous Page"): # st.session_state.page = max(1, st.session_state.page - 1) #if st.button("Next Page"): # st.session_state.page += 1 # Search and display if st.button("Search") or query.strip(): results, similarities = search(query) total_results = len(results) results_per_page = top_n total_pages = (total_results // results_per_page) + (1 if total_results % results_per_page != 0 else 0) # Paginate results start_idx = (st.session_state.page - 1) * results_per_page end_idx = start_idx + results_per_page paginated_results = results.iloc[start_idx:end_idx] st.write(f"Showing results {start_idx + 1}-{min(end_idx, total_results)} of {total_results} (Page {st.session_state.page}/{total_pages}):") # Start the grid layout # Display cards in rows using Streamlit's `st.columns()` for i, (index, row) in enumerate(paginated_results.iterrows()): if i % 3 == 0: # Create a new row every 3 cards cols = st.columns(3) # 3 cards per row # Use the appropriate column in the row with cols[i % 3]: link = row['Lien'] st.markdown( f"""
{row['content']}

Année-Modèle: {row['Année-Modèle']}

Price: {row['Prix']} MAD

City: {row['Ville']}

Kilométrage: {row['Kilométrage']} km

View Details
""", unsafe_allow_html=True, ) # Pagination controls st.write("Navigation:") col1, col2, col3 = st.columns(3) with col1: if st.button("Previous"): st.session_state.page = max(1, st.session_state.page - 1) with col3: if st.button("Next"): st.session_state.page += 1