Commit
·
86c33f7
1
Parent(s):
0a141aa
Update app.py
Browse files
app.py
CHANGED
@@ -26,6 +26,10 @@ if __name__ == "__main__":
|
|
26 |
# call the chunk size mehtod that sets the number
|
27 |
chunk_size = st.number_input('Chunk size:', min_value=100, max_value=2048, value=512, on_change=clear_history)
|
28 |
|
|
|
|
|
|
|
|
|
29 |
# input the top-k number, k increase the search effectiveness, but is more expensive
|
30 |
k = st.number_input('top-k most salient docs', min_value=1, max_value=20, value=3, on_change=clear_history)
|
31 |
|
@@ -43,7 +47,7 @@ if __name__ == "__main__":
|
|
43 |
f.write(bytes_data)
|
44 |
|
45 |
data = load_document(file_name)
|
46 |
-
chunks = chunk_data(data, chunk_size=chunk_size)
|
47 |
st.write(f'Chunk size: {chunk_size}, Chunks: {len(chunks)}')
|
48 |
|
49 |
tokens, embedding_cost = calculate_embedding_cost(chunks)
|
|
|
26 |
# call the chunk size mehtod that sets the number
|
27 |
chunk_size = st.number_input('Chunk size:', min_value=100, max_value=2048, value=512, on_change=clear_history)
|
28 |
|
29 |
+
# chunk Overlab
|
30 |
+
chunk_overlap = st.number_input('Chunk Overlap:', min_value=0, max_value=200, value=20, on_change=clear_history)
|
31 |
+
|
32 |
+
|
33 |
# input the top-k number, k increase the search effectiveness, but is more expensive
|
34 |
k = st.number_input('top-k most salient docs', min_value=1, max_value=20, value=3, on_change=clear_history)
|
35 |
|
|
|
47 |
f.write(bytes_data)
|
48 |
|
49 |
data = load_document(file_name)
|
50 |
+
chunks = chunk_data(data, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
51 |
st.write(f'Chunk size: {chunk_size}, Chunks: {len(chunks)}')
|
52 |
|
53 |
tokens, embedding_cost = calculate_embedding_cost(chunks)
|