Spaces:
Runtime error
Runtime error
Commit
·
6b3fd91
1
Parent(s):
e2e1dca
remove duplicates
Browse files- pages/1_Retrieval_App.py +6 -1
pages/1_Retrieval_App.py
CHANGED
@@ -130,7 +130,12 @@ def app_main(
|
|
130 |
):
|
131 |
print("loading data")
|
132 |
|
133 |
-
retrieval_df =
|
|
|
|
|
|
|
|
|
|
|
134 |
print("setting up retrieval_pipe")
|
135 |
doc_col = "dependencies"
|
136 |
retrieval_pipeline = setup_retrieval_pipeline(
|
|
|
130 |
):
|
131 |
print("loading data")
|
132 |
|
133 |
+
retrieval_df = (
|
134 |
+
datasets.load_dataset(data_path)["train"]
|
135 |
+
.to_pandas()
|
136 |
+
.drop_duplicates(subset=["repo"])
|
137 |
+
.reset_index(drop=True)
|
138 |
+
)
|
139 |
print("setting up retrieval_pipe")
|
140 |
doc_col = "dependencies"
|
141 |
retrieval_pipeline = setup_retrieval_pipeline(
|