eaglelandsonce commited on
Commit
86c33f7
·
1 Parent(s): 0a141aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -1
app.py CHANGED
@@ -26,6 +26,10 @@ if __name__ == "__main__":
26
  # call the chunk size mehtod that sets the number
27
  chunk_size = st.number_input('Chunk size:', min_value=100, max_value=2048, value=512, on_change=clear_history)
28
 
 
 
 
 
29
  # input the top-k number, k increase the search effectiveness, but is more expensive
30
  k = st.number_input('top-k most salient docs', min_value=1, max_value=20, value=3, on_change=clear_history)
31
 
@@ -43,7 +47,7 @@ if __name__ == "__main__":
43
  f.write(bytes_data)
44
 
45
  data = load_document(file_name)
46
- chunks = chunk_data(data, chunk_size=chunk_size)
47
  st.write(f'Chunk size: {chunk_size}, Chunks: {len(chunks)}')
48
 
49
  tokens, embedding_cost = calculate_embedding_cost(chunks)
 
26
  # call the chunk size mehtod that sets the number
27
  chunk_size = st.number_input('Chunk size:', min_value=100, max_value=2048, value=512, on_change=clear_history)
28
 
29
+ # chunk Overlab
30
+ chunk_overlap = st.number_input('Chunk Overlap:', min_value=0, max_value=200, value=20, on_change=clear_history)
31
+
32
+
33
  # input the top-k number, k increase the search effectiveness, but is more expensive
34
  k = st.number_input('top-k most salient docs', min_value=1, max_value=20, value=3, on_change=clear_history)
35
 
 
47
  f.write(bytes_data)
48
 
49
  data = load_document(file_name)
50
+ chunks = chunk_data(data, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
51
  st.write(f'Chunk size: {chunk_size}, Chunks: {len(chunks)}')
52
 
53
  tokens, embedding_cost = calculate_embedding_cost(chunks)