Maarten Van Segbroeck commited on
Commit
c6db2b5
·
verified ·
1 Parent(s): 3615cd5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -15
app.py CHANGED
@@ -90,10 +90,6 @@ def process_pdfs(uploaded_files, use_example, chunk_size, chunk_overlap, min_chu
90
  current_chunk = len(chunks) - 1
91
 
92
  chunk_text = chunks[current_chunk] if chunks else "No chunks available."
93
- # use_example_update = gr.update(
94
- # value=False,
95
- # interactive=uploaded_files is None or len(uploaded_files) == 0
96
- # )
97
 
98
  return pdf_chunks_dict, selected_pdfs, chunk_text, current_chunk#, use_example_update
99
 
@@ -151,18 +147,35 @@ def generate_synthetic_records(api_key, pdf_chunks_dict, num_records):
151
  }
152
 
153
  df_in = pd.DataFrame()
154
- documents = list(pdf_chunks_dict.keys())
155
- all_chunks = [(doc, chunk) for doc in documents for chunk in pdf_chunks_dict[doc]]
 
 
 
 
 
 
 
 
 
156
 
157
- for _ in range(num_records):
158
- doc, chunk = random.choice(all_chunks)
159
- df_doc = pd.DataFrame({'document': [doc], 'text': [chunk]})
160
- df_in = pd.concat([df_in, df_doc], ignore_index=True)
161
 
162
- df = navigator.edit(PROMPT, seed_data=df_in, **GENERATE_PARAMS)
163
- df = df.drop(columns=['text'])
164
 
165
- return gr.update(value=df, visible=True)
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  # CSS styling to center the logo and prevent right-click download
168
  css = """
@@ -273,6 +286,7 @@ with gr.Blocks() as demo:
273
  num_records = gr.Number(label="Number of Records", value=10)
274
 
275
  generate_button = gr.Button("Generate Synthetic Records", interactive=False)
 
276
 
277
  # Validate API key on input change and update button interactivity
278
  api_key_input.change(
@@ -283,10 +297,14 @@ with gr.Blocks() as demo:
283
 
284
  output_df = gr.Dataframe(headers=["document", "topic", "user_profile", "question", "answer", "context"], wrap=True, visible=True)
285
 
 
 
 
 
286
  generate_button.click(
287
- fn=generate_synthetic_records,
288
  inputs=[api_key_input, pdf_chunks_dict, num_records],
289
- outputs=[output_df]
290
  )
291
 
292
  demo.launch()
 
90
  current_chunk = len(chunks) - 1
91
 
92
  chunk_text = chunks[current_chunk] if chunks else "No chunks available."
 
 
 
 
93
 
94
  return pdf_chunks_dict, selected_pdfs, chunk_text, current_chunk#, use_example_update
95
 
 
147
  }
148
 
149
  df_in = pd.DataFrame()
150
+ try:
151
+ documents = list(pdf_chunks_dict.keys())
152
+ all_chunks = [(doc, chunk) for doc in documents for chunk in pdf_chunks_dict[doc]]
153
+
154
+ for _ in range(num_records):
155
+ doc, chunk = random.choice(all_chunks)
156
+ df_doc = pd.DataFrame({'document': [doc], 'text': [chunk]})
157
+ df_in = pd.concat([df_in, df_doc], ignore_index=True)
158
+
159
+ df = navigator.edit(PROMPT, seed_data=df_in, **GENERATE_PARAMS)
160
+ df = df.drop(columns=['text'])
161
 
162
+ csv_file = os.path.join(output_dir, "synthetic_qa.csv")
163
+ df.to_csv(csv_file, index=False)
 
 
164
 
165
+ return gr.update(value=df, visible=True), csv_file
 
166
 
167
+ except:
168
+ return gr.update(value=None, visible=False), None
169
+
170
+ # def generate_and_show_download_button(api_key, pdf_chunks_dict, num_records):
171
+ # df = generate_synthetic_records(api_key, pdf_chunks_dict, num_records)
172
+ # csv_file = download_dataframe(df)
173
+ # return df, gr.update(visible=True), csv_file
174
+
175
+ def download_dataframe(df):
176
+ csv_file = os.path.join(output_dir, "synthetic_qa.csv")
177
+ df.to_csv(csv_file, index=False)
178
+ return csv_file
179
 
180
  # CSS styling to center the logo and prevent right-click download
181
  css = """
 
286
  num_records = gr.Number(label="Number of Records", value=10)
287
 
288
  generate_button = gr.Button("Generate Synthetic Records", interactive=False)
289
+ download_link = gr.File(label="Download Link", visible=False)
290
 
291
  # Validate API key on input change and update button interactivity
292
  api_key_input.change(
 
297
 
298
  output_df = gr.Dataframe(headers=["document", "topic", "user_profile", "question", "answer", "context"], wrap=True, visible=True)
299
 
300
+ def generate_and_prepare_download(api_key, pdf_chunks_dict, num_records):
301
+ df, csv_file = generate_synthetic_records(api_key, pdf_chunks_dict, num_records)
302
+ return df, gr.update(value=csv_file, visible=df['value']!=None)
303
+
304
  generate_button.click(
305
+ fn=generate_and_prepare_download,
306
  inputs=[api_key_input, pdf_chunks_dict, num_records],
307
+ outputs=[output_df, download_link]
308
  )
309
 
310
  demo.launch()