Spaces:

kritsadaK
/

ThaiSentenceSimilarityApp

Sleeping

App Files Files Community

kritsadaK commited on Oct 29

Commit

4b84b24

•

1 Parent(s): 16ab143

Initial commit

Browse files

Files changed (1) hide show

app.py +2 -6

app.py CHANGED Viewed

@@ -14,12 +14,10 @@ warnings.filterwarnings("ignore", category=UserWarning, module="torchvision")
 try:
     tokenizer = AutoTokenizer.from_pretrained("airesearch/wangchanberta-base-att-spm-uncased", use_fast=False)
     model = AutoModelForMaskedLM.from_pretrained("airesearch/wangchanberta-base-att-spm-uncased")
-    model_name = "airesearch/wangchanberta-base-att-spm-uncased"
 except Exception:
     st.warning("Switching to xlm-roberta-base model due to compatibility issues.")
-    tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
     model = AutoModelForMaskedLM.from_pretrained("xlm-roberta-base")
-    model_name = "xlm-roberta-base"
 # Initialize the fill-mask pipeline
 pipe = pipeline("fill-mask", model=model, tokenizer=tokenizer, framework="pt")
@@ -68,7 +66,7 @@ Feel free to enter your own sentence with `<mask>` and explore the predictions!
 # User input box
 st.subheader("Input Text")
-input_text = st.text_input("Enter a sentence with `<mask>` to find similar predictions:", "ผู้ใช้งานท่าอากาศยานนานาชาติ <mask> มีกว่าสามล้านคน")
 # Ensure the input includes a `<mask>`
 if "<mask>" not in input_text:
@@ -90,10 +88,8 @@ if input_text:
         result = pipe(input_text)
         for r in result:
-            # Adjust based on observed output structure
             prediction_text = r.get('sequence', '')
-            # Only proceed if we have a valid prediction text
             if prediction_text:
                 prediction_embedding = get_embedding(prediction_text)
                 similarity = cosine_similarity(input_embedding, prediction_embedding)[0][0]

 try:
     tokenizer = AutoTokenizer.from_pretrained("airesearch/wangchanberta-base-att-spm-uncased", use_fast=False)
     model = AutoModelForMaskedLM.from_pretrained("airesearch/wangchanberta-base-att-spm-uncased")
 except Exception:
     st.warning("Switching to xlm-roberta-base model due to compatibility issues.")
+    tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base", use_fast=False)
     model = AutoModelForMaskedLM.from_pretrained("xlm-roberta-base")
 # Initialize the fill-mask pipeline
 pipe = pipeline("fill-mask", model=model, tokenizer=tokenizer, framework="pt")
 # User input box
 st.subheader("Input Text")
+input_text = st.text_input("Enter a sentence with `<mask>` to find similar predictions:", "นักท่องเที่ยวจำนวนมากเลือกที่จะไปเยือน <mask> เพื่อสัมผัสธรรมชาติ")
 # Ensure the input includes a `<mask>`
 if "<mask>" not in input_text:
         result = pipe(input_text)
         for r in result:
             prediction_text = r.get('sequence', '')
             if prediction_text:
                 prediction_embedding = get_embedding(prediction_text)
                 similarity = cosine_similarity(input_embedding, prediction_embedding)[0][0]