Zmorell commited on
Commit
3e3fdaa
·
verified ·
1 Parent(s): b094da2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -6
app.py CHANGED
@@ -6,9 +6,11 @@ import spacy
6
  import re
7
  from nltk.corpus import stopwords
8
  from nltk.tokenize import word_tokenize
9
- from tensorflow.keras.preprocessing.text import Tokenizer
10
  from tensorflow.keras.preprocessing.sequence import pad_sequences
 
 
11
 
 
12
  import spacy.cli
13
  spacy.cli.download("en_core_web_sm")
14
  nltk.download('punkt_tab')
@@ -17,7 +19,6 @@ stop_words = set(stopwords.words('english'))
17
  nlp = spacy.load('en_core_web_sm')
18
 
19
  # Download the model file from Hugging Face
20
- import requests
21
  model_url = "https://huggingface.co/Zmorell/HIPA_2/resolve/main/saved_keras_model.keras"
22
  local_model_path = "saved_keras_model.keras"
23
 
@@ -31,12 +32,17 @@ print(f"Model downloaded to {local_model_path}")
31
  model = tf.keras.models.load_model(local_model_path)
32
  print(f"Model loaded from {local_model_path}")
33
 
 
 
 
 
 
 
 
34
  def preprocess_text(text):
35
- text = re.sub(r'[^a-zA-Z0-9\s]', '', text) # Only remove non-alphanumeric characters except spaces
36
- # Tokenize and remove stopwords
37
  tokens = word_tokenize(text.lower())
38
  tokens = [word for word in tokens if word not in stop_words]
39
- # Lemmatize
40
  doc = nlp(' '.join(tokens))
41
  lemmas = [token.lemma_ for token in doc]
42
  return ' '.join(lemmas)
@@ -47,7 +53,6 @@ def predict(text):
47
  inputs = preprocess_text(text)
48
  print(f"Preprocessed text: {inputs}")
49
 
50
- # Ensure the input shape matches what the model expects
51
  inputs = tokenizer.texts_to_sequences([inputs])
52
  print(f"Tokenized text: {inputs}")
53
 
 
6
  import re
7
  from nltk.corpus import stopwords
8
  from nltk.tokenize import word_tokenize
 
9
  from tensorflow.keras.preprocessing.sequence import pad_sequences
10
+ import requests
11
+ import pickle
12
 
13
+ # Download necessary resources
14
  import spacy.cli
15
  spacy.cli.download("en_core_web_sm")
16
  nltk.download('punkt_tab')
 
19
  nlp = spacy.load('en_core_web_sm')
20
 
21
  # Download the model file from Hugging Face
 
22
  model_url = "https://huggingface.co/Zmorell/HIPA_2/resolve/main/saved_keras_model.keras"
23
  local_model_path = "saved_keras_model.keras"
24
 
 
32
  model = tf.keras.models.load_model(local_model_path)
33
  print(f"Model loaded from {local_model_path}")
34
 
35
+ # Load the tokenizer
36
+ tokenizer_file_path = "tokenizer.pickle"
37
+ with open(tokenizer_file_path, 'rb') as handle:
38
+ tokenizer = pickle.load(handle)
39
+
40
+ print("Tokenizer loaded from tokenizer.pickle")
41
+
42
  def preprocess_text(text):
43
+ text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
 
44
  tokens = word_tokenize(text.lower())
45
  tokens = [word for word in tokens if word not in stop_words]
 
46
  doc = nlp(' '.join(tokens))
47
  lemmas = [token.lemma_ for token in doc]
48
  return ' '.join(lemmas)
 
53
  inputs = preprocess_text(text)
54
  print(f"Preprocessed text: {inputs}")
55
 
 
56
  inputs = tokenizer.texts_to_sequences([inputs])
57
  print(f"Tokenized text: {inputs}")
58