import gradio as gr import re from collections import defaultdict def create_inverted_index(dataframe): """ Create an inverted index from the Quran dataset for efficient word searching. Args: dataframe (pd.DataFrame): The Quran dataset with Text column Returns: defaultdict: A dictionary mapping words to their locations (surah, ayah) """ inverted_index = defaultdict(list) for index, row in dataframe.iterrows(): words = row['Text'].split() for word in words: word = re.sub(r'\W+', '', word).lower() if word: inverted_index[word].append((row['Surah'], row['Ayah'])) return inverted_index def search_quran(keyword, quran_data, inverted_index): """ Search for a keyword in the Quran and return formatted results. Args: keyword (str): The word to search for quran_data (pd.DataFrame): The Quran dataset inverted_index (defaultdict): The inverted index for word searching Returns: str: HTML formatted search results """ results = [] if not keyword or keyword.lower() not in inverted_index: return f"No occurrences found for '{keyword}'" verse_ids = inverted_index[keyword.lower()] for verse_id in verse_ids: surah_num, verse_num = verse_id verse_data = quran_data[(quran_data['Surah'] == surah_num) & (quran_data['Ayah'] == verse_num)] surah_name = verse_data['Surah Name'].iloc[0] verse_text = verse_data['Text'].iloc[0] highlighted_text = re.sub( f"(?i)({keyword})", r'\1', verse_text ) results.append( f"