import requests from bs4 import BeautifulSoup from typing import List, Union def parse_doc_ids(doc_ids: Union[List[str], str, None]) -> List[str]: if doc_ids is None: return [] if isinstance(doc_ids, list): return [str(id).strip('[]') for id in doc_ids] if isinstance(doc_ids, str): cleaned = doc_ids.strip('[]').replace(' ', '') if cleaned: return [id.strip() for id in cleaned.split(',')] return [] def get_links_html(doc_ids: Union[List[str], str, None]) -> str: parsed_ids = parse_doc_ids(doc_ids) if not parsed_ids: return "" links = [f"[Рішення ВС: {doc_id}](https://reyestr.court.gov.ua/Review/{doc_id})" for doc_id in parsed_ids] return ", ".join(links) def parse_lp_ids(lp_ids: Union[str, int, None]) -> List[str]: if lp_ids is None: return [] if isinstance(lp_ids, (str, int)): cleaned = str(lp_ids).strip('[]').replace(' ', '') if cleaned: return [cleaned] return [] def get_links_html_lp(lp_ids: Union[str, int, None]) -> str: parsed_ids = parse_lp_ids(lp_ids) if not parsed_ids: return "" links = [f"[ПП ВС: {lp_id}](https://lpd.court.gov.ua/home/search/{lp_id})" for lp_id in parsed_ids] return ", ".join(links) def extract_court_decision_text(url: str) -> str: response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') unwanted_texts = [ "Доступ до Реєстру здійснюється в тестовому (обмеженому) режимі.", "З метою упередження перешкоджанню стабільній роботі Реєстру" ] decision_text = "" for paragraph in soup.find_all('p'): text = paragraph.get_text(separator="\n").strip() if not any(unwanted_text in text for unwanted_text in unwanted_texts): decision_text += text + "\n" return decision_text.strip()