Zmorell commited on
Commit
331f4df
·
verified ·
1 Parent(s): 8f4e2c8

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +46 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from tensorflow import keras
3
+ import pandas as pd
4
+ import tensorflow as tf
5
+ import nltk
6
+ import spacy
7
+ import re
8
+ from nltk.corpus import stopwords
9
+ from nltk.tokenize import word_tokenize
10
+ from tensorflow.keras.preprocessing.text import Tokenizer
11
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
12
+
13
+ import spacy.cli
14
+ spacy.cli.download("en_core_web_sm")
15
+ nltk.download('punkt_tab')
16
+ nltk.download('stopwords')
17
+ stop_words = set(stopwords.words('english'))
18
+ nlp = spacy.load('en_core_web_sm')
19
+
20
+ # Available backend options are: "jax", "torch", "tensorflow".
21
+ import os
22
+ os.environ["KERAS_BACKEND"] = "jax"
23
+
24
+ import keras
25
+
26
+ model = keras.saving.load_model("hf://ARI-HIPA-AI-Team/keras_model")
27
+
28
+ def preprocess_text(text):
29
+ text = re.sub(r'[^a-zA-Z0-9\s]', '', text) # Only remove non-alphanumeric characters except spaces
30
+
31
+ # Tokenize and remove stopwords
32
+ tokens = word_tokenize(text.lower())
33
+ tokens = [word for word in tokens if word not in stop_words]
34
+
35
+ # Lemmatize
36
+ doc = nlp(' '.join(tokens))
37
+ lemmas = [token.lemma_ for token in doc]
38
+ return ' '.join(lemmas)
39
+
40
+ def predict(text):
41
+ inputs = preprocess_text(text)
42
+ outputs = model(inputs)
43
+ return "This text is a violation = " + outputs
44
+
45
+ demo = gr.Interface(fn=predict, inputs="text", outputs="text")
46
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ huggingface_hub==0.25.2
2
+ transformers
3
+ tensorflow
4
+ nltk
5
+ spacy