from sentence_transformers import SentenceTransformer import pandas as pd import torch # Load pre-trained model for sentence embedding model = SentenceTransformer('distilbert-base-nli-stsb-mean-tokens') # Load scraped courses data courses_df = pd.read_csv("courses_data.csv") # Encode course descriptions courses_df['embedding'] = courses_df['description'].apply(lambda x: model.encode(x, convert_to_tensor=True)) def search_courses(query, top_k=5): query_embedding = model.encode(query, convert_to_tensor=True) cosine_scores = torch.nn.functional.cosine_similarity(query_embedding, torch.stack(courses_df['embedding'].tolist())) top_results = torch.topk(cosine_scores, k=top_k) results = [] for idx in top_results.indices: course = courses_df.iloc[idx.item()] results.append({ 'title': course['title'], 'description': course['description'], 'curriculum': course['curriculum'] }) return results