File size: 1,145 Bytes
6f6fe54
1fbe29c
 
 
 
 
 
 
 
 
 
d13a069
1fbe29c
 
 
 
d13a069
1fbe29c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba566f6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import gradio as gr
from tensorflow import keras
import pandas as pd
import tensorflow as tf
import nltk
import spacy
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

nltk.download('punkt_tab')
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
nlp = spacy.load('en_core_web_sm')

model = tf.keras.models.load_model("path_to_your_model/my_keras_model")

def preprocess_text(text):
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)  # Only remove non-alphanumeric characters except spaces

    # Tokenize and remove stopwords
    tokens = word_tokenize(text.lower())
    tokens = [word for word in tokens if word not in stop_words]

    # Lemmatize
    doc = nlp(' '.join(tokens))
    lemmas = [token.lemma_ for token in doc]
    return ' '.join(lemmas)

def predict(text):
    inputs = preprocess_text(text)
    outputs = model(inputs)
    return "This text is a violation = " + outputs

demo = gr.Interface(fn=predict, inputs="text", outputs="text")
demo.launch()