Update README.md
Browse files
README.md
CHANGED
@@ -151,6 +151,21 @@ print(block_rankings)
|
|
151 |
|
152 |
# [1, 0]
|
153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
max_context_window = 32
|
155 |
pruned_html = gen_embed_pruner.prune_HTML(pruned_html, block_tree, block_rankings, chat_tokenizer, max_context_window)
|
156 |
print(pruned_html)
|
|
|
151 |
|
152 |
# [1, 0]
|
153 |
|
154 |
+
block_tree, pruned_html=build_block_tree(pruned_html, max_node_words=10)
|
155 |
+
for block in block_tree:
|
156 |
+
print("Block Content: ", block[0])
|
157 |
+
print("Block Path: ", block[1])
|
158 |
+
print("Is Leaf: ", block[2])
|
159 |
+
print("")
|
160 |
+
|
161 |
+
# Block Content: <title>When was the bellagio in las vegas built?</title>
|
162 |
+
# Block Path: ['html', 'title']
|
163 |
+
# Is Leaf: True
|
164 |
+
#
|
165 |
+
# Block Content: <p>The Bellagio is a luxury hotel and casino located on the Las Vegas Strip in Paradise, Nevada. It was built in 1998.</p>
|
166 |
+
# Block Path: ['html', 'p']
|
167 |
+
# Is Leaf: True
|
168 |
+
|
169 |
max_context_window = 32
|
170 |
pruned_html = gen_embed_pruner.prune_HTML(pruned_html, block_tree, block_rankings, chat_tokenizer, max_context_window)
|
171 |
print(pruned_html)
|