import re from typing import List, Dict, Tuple, Union import httpx import asyncio from rapidfuzz import process, fuzz class SmartSearch: def __init__(self, films_url: str, tv_series_url: str): self.films_url = films_url self.tv_series_url = tv_series_url self.films, self.tv_series = None, None self.index = None self.is_initialized = False async def download_and_save_data(self, url: str) -> Dict: async with httpx.AsyncClient(timeout=30.0) as client: attempt = 0 while True: try: response = await client.get(url) response.raise_for_status() return response.json() except httpx.ReadTimeout: attempt += 1 print(f"Timeout occurred, retrying... (attempt {attempt})") await asyncio.sleep(2) # Wait for 2 seconds before retrying except Exception as e: print(f"An error occurred: {e}") raise async def load_data(self) -> Tuple[Dict[str, str], Dict[str, Dict[str, Union[str, List[Dict[str, str]]]]]]: films = await self.download_and_save_data(self.films_url) tv_series = await self.download_and_save_data(self.tv_series_url) films_normalized = {re.sub(r'^films/', '', film).lower().strip(): re.sub(r'^films/', '', film) for film in films} tv_series_normalized = {} for series, episodes in tv_series.items(): series_normalized = re.sub(r'^tv/', '', series).lower().strip() episodes_normalized = [ { "episode": re.sub(r'^tv/.*?/.*?/', '', ep['episode']).strip(), "path": ep['path'], "season": ep['season'] } for ep in episodes ] tv_series_normalized[series_normalized] = { "original": series, "episodes": episodes_normalized } return films_normalized, tv_series_normalized async def initialize(self): self.films, self.tv_series = await self.load_data() self.index = self.create_index(self.films, self.tv_series) self.is_initialized = True async def update_data(self): await self.initialize() def create_index(self, films: Dict[str, str], tv_series: Dict[str, Dict[str, Union[str, List[Dict[str, str]]]]]) -> Dict[str, Union[Dict[str, str], Dict[str, Dict[str, Union[str, List[Dict[str, str]]]]]]]: return { 'films': films, 'tv_series': tv_series } def search(self, query: str) -> Dict[str, List[str]]: query = query.lower().strip() results = {'films': [], 'tv_series': []} # Search films using RapidFuzz films = self.index['films'] film_matches = process.extract(query, films.keys(), scorer=fuzz.partial_ratio, score_cutoff=45) results['films'] = [films[match[0]] for match in film_matches] # Search TV series using RapidFuzz tv_series = self.index['tv_series'] series_matches = process.extract(query, tv_series.keys(), scorer=fuzz.partial_ratio, score_cutoff=50) for match in series_matches: results['tv_series'].append(tv_series[match[0]]['original']) # Search TV series episodes for series, data in tv_series.items(): for episode in data['episodes']: episode_matches = process.extract(query, [episode['episode'].lower()], scorer=fuzz.partial_ratio, score_cutoff=80) if episode_matches: results['tv_series'].append(data['original']) break return results