lambdaofgod commited on
Commit
6b3fd91
·
1 Parent(s): e2e1dca

remove duplicates

Browse files
Files changed (1) hide show
  1. pages/1_Retrieval_App.py +6 -1
pages/1_Retrieval_App.py CHANGED
@@ -130,7 +130,12 @@ def app_main(
130
  ):
131
  print("loading data")
132
 
133
- retrieval_df = datasets.load_dataset(data_path)["train"].to_pandas()
 
 
 
 
 
134
  print("setting up retrieval_pipe")
135
  doc_col = "dependencies"
136
  retrieval_pipeline = setup_retrieval_pipeline(
 
130
  ):
131
  print("loading data")
132
 
133
+ retrieval_df = (
134
+ datasets.load_dataset(data_path)["train"]
135
+ .to_pandas()
136
+ .drop_duplicates(subset=["repo"])
137
+ .reset_index(drop=True)
138
+ )
139
  print("setting up retrieval_pipe")
140
  doc_col = "dependencies"
141
  retrieval_pipeline = setup_retrieval_pipeline(