Spaces:

DocSA
/

Legal_Position_hybrid_search_without_AI

Runtime error

Add basic BM25 search and corpus generation

920001b 3 months ago

890 Bytes

	from pathlib import Path

	from llama_index.retrievers.bm25 import BM25Retriever
	from main import extract_court_decision_text


	PERSIST_PATH = Path("Save_Index_Local")

	INDEX_NAME = "bm25_retriever"
	# INDEX_NAME = "bm25_retriever_meta"

	TEST_CD_URL = "https://reyestr.court.gov.ua/Review/118766467"
	# TEST_CD_URL = "https://reyestr.court.gov.ua/Review/118763429"

	PRINT_CD = False


	retriever = BM25Retriever.from_persist_dir(str(PERSIST_PATH / INDEX_NAME))

	court_decision_text = extract_court_decision_text(TEST_CD_URL)

	if PRINT_CD:
	print(court_decision_text, "\n\n\n\n\n")

	nodes_with_score = retriever.retrieve(court_decision_text)
	for index, node_with_score in enumerate(nodes_with_score, start=1):
	source_title = node_with_score.node.metadata.get("title", "Невідомий заголовок")
	print(index, f"{node_with_score.score:.4f}", source_title, "\n", sep="\t")