frankjosh commited on
Commit
bdd7b82
·
verified ·
1 Parent(s): ad91929

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -30,9 +30,16 @@ def generate_embedding(text, tokenizer, model, device):
30
 
31
  # Load dataset
32
  @st.cache_data
33
- def load_data():
34
  dataset = load_dataset("frankjosh/filtered_dataset", split="train")
35
  df = pd.DataFrame(dataset).head(500) # Limit to 500 repositories
 
 
 
 
 
 
 
36
  return df
37
 
38
  def fetch_readme(repo_url):
@@ -54,7 +61,7 @@ def main():
54
 
55
  # Load resources
56
  tokenizer, model, device = load_model()
57
- data = load_data()
58
 
59
  # Input user query
60
  user_query = st.text_input("Describe your project or learning goal:",
 
30
 
31
  # Load dataset
32
  @st.cache_data
33
+ def load_data(tokenizer, model, device):
34
  dataset = load_dataset("frankjosh/filtered_dataset", split="train")
35
  df = pd.DataFrame(dataset).head(500) # Limit to 500 repositories
36
+
37
+ # Generate embeddings for each row
38
+ def compute_embedding(row):
39
+ text = f"{row['docstring']} {row['summary']}" if 'docstring' in row and 'summary' in row else ""
40
+ return generate_embedding(text, tokenizer, model, device)
41
+
42
+ df['embedding'] = df.apply(compute_embedding, axis=1)
43
  return df
44
 
45
  def fetch_readme(repo_url):
 
61
 
62
  # Load resources
63
  tokenizer, model, device = load_model()
64
+ data = load_data(tokenizer, model, device)
65
 
66
  # Input user query
67
  user_query = st.text_input("Describe your project or learning goal:",