Spaces:

ARI-HIPA-AI-Team
/

HIPA-AI

Sleeping

Zmorell commited on Dec 5, 2024

Commit

3e3fdaa

verified ·

1 Parent(s): b094da2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,9 +6,11 @@ import spacy
 import re
 from nltk.corpus import stopwords
 from nltk.tokenize import word_tokenize
-from tensorflow.keras.preprocessing.text import Tokenizer
 from tensorflow.keras.preprocessing.sequence import pad_sequences
 import spacy.cli
 spacy.cli.download("en_core_web_sm")
 nltk.download('punkt_tab')
@@ -17,7 +19,6 @@ stop_words = set(stopwords.words('english'))
 nlp = spacy.load('en_core_web_sm')
 # Download the model file from Hugging Face
-import requests
 model_url = "https://huggingface.co/Zmorell/HIPA_2/resolve/main/saved_keras_model.keras"
 local_model_path = "saved_keras_model.keras"
@@ -31,12 +32,17 @@ print(f"Model downloaded to {local_model_path}")
 model = tf.keras.models.load_model(local_model_path)
 print(f"Model loaded from {local_model_path}")
 def preprocess_text(text):
-    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)  # Only remove non-alphanumeric characters except spaces
-    # Tokenize and remove stopwords
     tokens = word_tokenize(text.lower())
     tokens = [word for word in tokens if word not in stop_words]
-    # Lemmatize
     doc = nlp(' '.join(tokens))
     lemmas = [token.lemma_ for token in doc]
     return ' '.join(lemmas)
@@ -47,7 +53,6 @@ def predict(text):
         inputs = preprocess_text(text)
         print(f"Preprocessed text: {inputs}")
-        # Ensure the input shape matches what the model expects
         inputs = tokenizer.texts_to_sequences([inputs])
         print(f"Tokenized text: {inputs}")

 import re
 from nltk.corpus import stopwords
 from nltk.tokenize import word_tokenize
 from tensorflow.keras.preprocessing.sequence import pad_sequences
+import requests
+import pickle
+# Download necessary resources
 import spacy.cli
 spacy.cli.download("en_core_web_sm")
 nltk.download('punkt_tab')
 nlp = spacy.load('en_core_web_sm')
 # Download the model file from Hugging Face
 model_url = "https://huggingface.co/Zmorell/HIPA_2/resolve/main/saved_keras_model.keras"
 local_model_path = "saved_keras_model.keras"
 model = tf.keras.models.load_model(local_model_path)
 print(f"Model loaded from {local_model_path}")
+# Load the tokenizer
+tokenizer_file_path = "tokenizer.pickle"
+with open(tokenizer_file_path, 'rb') as handle:
+    tokenizer = pickle.load(handle)
+print("Tokenizer loaded from tokenizer.pickle")
 def preprocess_text(text):
+    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
     tokens = word_tokenize(text.lower())
     tokens = [word for word in tokens if word not in stop_words]
     doc = nlp(' '.join(tokens))
     lemmas = [token.lemma_ for token in doc]
     return ' '.join(lemmas)
         inputs = preprocess_text(text)
         print(f"Preprocessed text: {inputs}")
         inputs = tokenizer.texts_to_sequences([inputs])
         print(f"Tokenized text: {inputs}")