Spaces:
Running
Running
import requests | |
from bs4 import BeautifulSoup | |
from typing import List, Union | |
def parse_doc_ids(doc_ids: Union[List[str], str, None]) -> List[str]: | |
if doc_ids is None: | |
return [] | |
if isinstance(doc_ids, list): | |
return [str(id).strip('[]') for id in doc_ids] | |
if isinstance(doc_ids, str): | |
cleaned = doc_ids.strip('[]').replace(' ', '') | |
if cleaned: | |
return [id.strip() for id in cleaned.split(',')] | |
return [] | |
def get_links_html(doc_ids: Union[List[str], str, None]) -> str: | |
parsed_ids = parse_doc_ids(doc_ids) | |
if not parsed_ids: | |
return "" | |
links = [f"[Рішення ВС: {doc_id}](https://reyestr.court.gov.ua/Review/{doc_id})" | |
for doc_id in parsed_ids] | |
return ", ".join(links) | |
def parse_lp_ids(lp_ids: Union[str, int, None]) -> List[str]: | |
if lp_ids is None: | |
return [] | |
if isinstance(lp_ids, (str, int)): | |
cleaned = str(lp_ids).strip('[]').replace(' ', '') | |
if cleaned: | |
return [cleaned] | |
return [] | |
def get_links_html_lp(lp_ids: Union[str, int, None]) -> str: | |
parsed_ids = parse_lp_ids(lp_ids) | |
if not parsed_ids: | |
return "" | |
links = [f"[ПП ВС: {lp_id}](https://lpd.court.gov.ua/home/search/{lp_id})" | |
for lp_id in parsed_ids] | |
return ", ".join(links) | |
def extract_court_decision_text(url: str) -> str: | |
response = requests.get(url) | |
soup = BeautifulSoup(response.content, 'html.parser') | |
unwanted_texts = [ | |
"Доступ до Реєстру здійснюється в тестовому (обмеженому) режимі.", | |
"З метою упередження перешкоджанню стабільній роботі Реєстру" | |
] | |
decision_text = "" | |
for paragraph in soup.find_all('p'): | |
text = paragraph.get_text(separator="\n").strip() | |
if not any(unwanted_text in text for unwanted_text in unwanted_texts): | |
decision_text += text + "\n" | |
return decision_text.strip() |