curtpond jnick commited on
Commit
07b8953
1 Parent(s): d34dca7

Updated app.py with NER model (#5)

Browse files

- Updated app.py with NER model (44d4cf46f88ff766b60b476bf4e750c7b49aeb1b)


Co-authored-by: Julia <[email protected]>

Files changed (1) hide show
  1. app.py +30 -6
app.py CHANGED
@@ -9,6 +9,8 @@ from nltk.corpus import stopwords
9
  nltk.download('stopwords')
10
  from sklearn.feature_extraction.text import CountVectorizer
11
  from sklearn.feature_extraction.text import TfidfVectorizer
 
 
12
 
13
  # file name
14
  #lr_filename = 'lr_021223.pkl'
@@ -55,13 +57,35 @@ Prediction function
55
  #return prediction
56
  '''
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  # Define interface
59
- demo = gr.Interface(fn=predict,
60
- title="Text Classification Demo",
61
- description="This is a demo of a text classification model using Logistic Regression.",
62
- inputs=gr.Textbox(lines=10, placeholder='Input text here...', label="Input Text"),
63
- outputs=gr.Textbox(label="Predicted Label: Other: 0, Healthcare: 1, Technology: 2", lines=2, placeholder='Predicted label will appear here...'),
64
- allow_flagging='never'
 
 
 
 
65
  )
66
 
67
  demo.launch()
 
9
  nltk.download('stopwords')
10
  from sklearn.feature_extraction.text import CountVectorizer
11
  from sklearn.feature_extraction.text import TfidfVectorizer
12
+ from flair.data import Sentence
13
+ from flair.models import SequenceTagger
14
 
15
  # file name
16
  #lr_filename = 'lr_021223.pkl'
 
57
  #return prediction
58
  '''
59
 
60
+ # Specify NER model
61
+ tagger = SequenceTagger.load('/Users/julia/Models/ner_04_results/best-model.pt') # SequenceTagger.load('best-model.pt')
62
+
63
+ # Runs NER on input text
64
+ def run_ner(input_text):
65
+ sentence = Sentence(input_text)
66
+ tagger.predict(sentence)
67
+ output = []
68
+ for entity in sentence.get_spans('ner'):
69
+ output.append({'entity': entity.get_label('ner').value, 'word': entity.text, 'start': entity.start_position, 'end': entity.end_position})
70
+ return {"text": input_text, "entities": output}
71
+
72
+ # Run both models, and return a tuple of their results
73
+ def run_models(input_text):
74
+ prediction = 0 # This "0" is a placeholder to avoid errors; once the LR model is working, use this instead: prediction = predict(input_text)
75
+ entities = run_ner(input_text)
76
+ return prediction, entities
77
+
78
  # Define interface
79
+ demo = gr.Interface(fn=run_models,
80
+ title="Text Classification & Named Entity Recognition Demo",
81
+ description="This is a demo of a text classification model using logistic regression as well as a named entity recognition model. Enter in some text or use one of the provided examples. Note that common named entity recognition tags include **geo** (geographical entity), **org** (organization), **per** (person), and **tim** (time).",
82
+ article='*This demo is based on Logistic Regression and Named Entity Recognition models trained by Curtis Pond and Julia Nickerson as part of their FourthBrain capstone project. For more information, check out their [GitHub repo](https://github.com/nickersonj/glg-capstone).*',
83
+ inputs=gr.Textbox(lines=10, placeholder='Input text here...', label="Input Text"),
84
+ outputs=[gr.Textbox(label="Predicted Classification Label: Other: 0, Healthcare: 1, Technology: 2", lines=2, placeholder='Predicted label will appear here...'),
85
+ gr.HighlightedText(label='Named Entity Recognition Results')],
86
+ # These examples are just placeholders; once the LR model is working, we can use longer example text such as paragraphs
87
+ examples=['The indictments were announced Tuesday by the Justice Department in Cairo.', "In 2019, the men's singles winner was Novak Djokovic who defeated Roger Federer in a tournament taking place in the United Kingdom.", 'In a study published by the American Heart Association on January 18, researchers at the Johns Hopkins School of Medicine found that meal timing did not impact weight.'],
88
+ allow_flagging='never'
89
  )
90
 
91
  demo.launch()