Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -30,9 +30,16 @@ def generate_embedding(text, tokenizer, model, device):
|
|
30 |
|
31 |
# Load dataset
|
32 |
@st.cache_data
|
33 |
-
def load_data():
|
34 |
dataset = load_dataset("frankjosh/filtered_dataset", split="train")
|
35 |
df = pd.DataFrame(dataset).head(500) # Limit to 500 repositories
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
return df
|
37 |
|
38 |
def fetch_readme(repo_url):
|
@@ -54,7 +61,7 @@ def main():
|
|
54 |
|
55 |
# Load resources
|
56 |
tokenizer, model, device = load_model()
|
57 |
-
data = load_data()
|
58 |
|
59 |
# Input user query
|
60 |
user_query = st.text_input("Describe your project or learning goal:",
|
|
|
30 |
|
31 |
# Load dataset
|
32 |
@st.cache_data
|
33 |
+
def load_data(tokenizer, model, device):
|
34 |
dataset = load_dataset("frankjosh/filtered_dataset", split="train")
|
35 |
df = pd.DataFrame(dataset).head(500) # Limit to 500 repositories
|
36 |
+
|
37 |
+
# Generate embeddings for each row
|
38 |
+
def compute_embedding(row):
|
39 |
+
text = f"{row['docstring']} {row['summary']}" if 'docstring' in row and 'summary' in row else ""
|
40 |
+
return generate_embedding(text, tokenizer, model, device)
|
41 |
+
|
42 |
+
df['embedding'] = df.apply(compute_embedding, axis=1)
|
43 |
return df
|
44 |
|
45 |
def fetch_readme(repo_url):
|
|
|
61 |
|
62 |
# Load resources
|
63 |
tokenizer, model, device = load_model()
|
64 |
+
data = load_data(tokenizer, model, device)
|
65 |
|
66 |
# Input user query
|
67 |
user_query = st.text_input("Describe your project or learning goal:",
|