Frenchizer commited on
Commit
4235ba5
·
verified ·
1 Parent(s): f9babd0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -17
app.py CHANGED
@@ -1,13 +1,12 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
  import spacy
4
- import language_tool_python
5
  import json
6
  import requests
7
 
8
- # Initialize models and tools
9
- nlp = spacy.load("en_core_web_sm")
10
- language_tool = language_tool_python.LanguageTool('en-US')
11
  spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
12
 
13
  def preprocess_and_forward(text: str) -> str:
@@ -43,24 +42,22 @@ def preprocess_and_forward(text: str) -> str:
43
 
44
  def preprocess_text(text: str):
45
  result = {
46
- "corrections": [],
47
  "entities": [],
48
- "tags": [],
49
- "spell_suggestions": []
50
  }
51
 
52
- # Spell checking
53
- matches = language_tool.check(text)
54
- for match in matches:
55
- if match.replacements:
56
- result["corrections"].append({
57
- "original": match.context[match.offsetInContext:match.offsetInContext + match.errorLength],
58
- "suggestion": match.replacements[0]
59
- })
60
 
61
  # Transformer-based spell check
62
  spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
63
- if spell_checked != text:
64
  result["spell_suggestions"].append({
65
  "original": text,
66
  "corrected": spell_checked
@@ -70,7 +67,7 @@ def preprocess_text(text: str):
70
  doc = nlp(text)
71
  result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
72
 
73
- # Extract potential tags
74
  result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
75
 
76
  return text, result
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  import spacy
4
+ from textblob import TextBlob
5
  import json
6
  import requests
7
 
8
+ # Initialize models
9
+ nlp = spacy.load("en_core_web_sm") # Use "en_core_web_trf" if more accuracy is needed
 
10
  spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
11
 
12
  def preprocess_and_forward(text: str) -> str:
 
42
 
43
  def preprocess_text(text: str):
44
  result = {
45
+ "spell_suggestions": [],
46
  "entities": [],
47
+ "tags": []
 
48
  }
49
 
50
+ # Basic spell checking using TextBlob
51
+ corrected_text = str(TextBlob(text).correct())
52
+ if corrected_text != text:
53
+ result["spell_suggestions"].append({
54
+ "original": text,
55
+ "corrected": corrected_text
56
+ })
 
57
 
58
  # Transformer-based spell check
59
  spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
60
+ if spell_checked != text and spell_checked != corrected_text:
61
  result["spell_suggestions"].append({
62
  "original": text,
63
  "corrected": spell_checked
 
67
  doc = nlp(text)
68
  result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
69
 
70
+ # Extract potential tags (hashtags, mentions, etc.)
71
  result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
72
 
73
  return text, result