import spacy
import pickle
from nltk.corpus import wordnet


def load_spacy_values(model = "en_core_web_md", filepath_docs_spacy = 'dict_spacy_object.pkl'):
    '''
    Loads a spaCy language model and a dictionary of spaCy Doc objects from a pickle file.

    Parameters
    ----------
    model : str
        The name or local path of the spaCy model to be loaded for processing text. 
        For example, "en_core_web_sm" or a custom model path.

    filepath_docs_spacy : str
        The path to the pickle file containing a dictionary where the keys are tokens 
        (strings) and the values are the corresponding serialized spaCy Doc objects.

    Returns
    -------
    nlp : spacy.language.Language
        The loaded spaCy language model.

    dict_docs_spacy : dict
        A dictionary where the keys are tokens (strings) and the values are spaCy Doc 
        objects reconstructed from the serialized bytes stored in the pickle file.
    '''
    
    # ---- Load the spaCy NLP model
    #
    nlp = spacy.load(model)
    
    # ---- Load pickle file and reconstruct the dictionary with tokens as keys and spaCy Doc objects as values
    #
    with open(filepath_docs_spacy, 'rb') as file:
        dict_docs_spacy_bytes = pickle.load(file)
    
    dict_docs_spacy = {key: spacy.tokens.Doc(nlp.vocab).from_bytes(doc_bytes) for key, doc_bytes in dict_docs_spacy_bytes.items()}
    
    return nlp, dict_docs_spacy


def find_antonyms(word):
    '''
    Generate a set of all the antonyms of a given word

    Parameters
    ----------
    word : str
        The word that we want to find the antonyms

    Returns
    -------
    antonyms : set of str
        A set of all the antonym detected using nltk and WordNet
    '''
    
    antonyms = set()

    # ---- Load all the set of synonyms of the word recorded from wordnet
    #
    syn_set = wordnet.synsets(word)

    # ---- Loop over each set of synonyms
    #
    for syn in syn_set:
        # ---- Loop over each synonym
        #
        for lemma in syn.lemmas():
            # ---- Add antonyms of the synonyms to the antonyms set
            #
            if lemma.antonyms():
                antonyms.add(lemma.antonyms()[0].name())

    return antonyms


def find_synonyms(word, model, dict_embedding, list_2000_tokens):
    # 고유명사 보존 
    doc = model(word)
    if doc[0].pos_ == "PROPN":
        return word

    # 기본 동사 매핑
    basic_verbs = {
        "is": "IS",
        "am": "IS", 
        "are": "IS",
        "was": "IS",
        "were": "IS",
        "be": "IS",
        "have": "HAVE",
        "has": "HAVE",
        "had": "HAVE"
    }
   
    if word.lower() in basic_verbs:
        return basic_verbs[word.lower()]

    # 이미 목록에 있는 단어는 그대로 반환
    if word in list_2000_tokens:
        return word
       
    # 품사가 같은 유사어 찾기
    word_doc = model(word)
    word_pos = word_doc[0].pos_
   
    antonyms = find_antonyms(word)
    filtered_tokens = [
        token for token in list_2000_tokens 
        if token not in antonyms
        and model(token)[0].pos_ == word_pos
    ]

    similarities = []
    word_embedding = model(word)
   
    for token in filtered_tokens:
        similarities.append((token, dict_embedding.get(token).similarity(word_embedding)))

    # ====== 수정된 부분: similarities 리스트가 비었는지 확인 ======
    if not similarities:
        # 유사 후보가 없다면 원본 단어를 그대로 반환
        return word
    # ==========================================================
    
    most_similar_token = sorted(similarities, key=lambda item: -item[1])[0][0]
    return most_similar_token