Spaces:
Sleeping
Sleeping
from sentence_transformers import SentenceTransformer | |
import pandas as pd | |
import torch | |
# Load pre-trained model for sentence embedding | |
model = SentenceTransformer('distilbert-base-nli-stsb-mean-tokens') | |
# Load scraped courses data | |
courses_df = pd.read_csv("courses_data.csv") | |
# Encode course descriptions | |
courses_df['embedding'] = courses_df['description'].apply(lambda x: model.encode(x, convert_to_tensor=True)) | |
def search_courses(query, top_k=5): | |
query_embedding = model.encode(query, convert_to_tensor=True) | |
cosine_scores = torch.nn.functional.cosine_similarity(query_embedding, torch.stack(courses_df['embedding'].tolist())) | |
top_results = torch.topk(cosine_scores, k=top_k) | |
results = [] | |
for idx in top_results.indices: | |
course = courses_df.iloc[idx.item()] | |
results.append({ | |
'title': course['title'], | |
'description': course['description'], | |
'curriculum': course['curriculum'] | |
}) | |
return results | |