Pavankalyan commited on
Commit
054f6fb
·
1 Parent(s): f73cf1b

Upload 2 files

Browse files
Files changed (2) hide show
  1. gingerit.py +58 -0
  2. output_beautify.py +36 -0
gingerit.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ import requests
3
+ import cloudscraper
4
+ URL = "https://services.gingersoftware.com/Ginger/correct/jsonSecured/GingerTheTextFull" # noqa
5
+ API_KEY = "6ae0c3a0-afdc-4532-a810-82ded0054236"
6
+
7
+
8
+ class GingerIt(object):
9
+ def __init__(self):
10
+ self.url = URL
11
+ self.api_key = API_KEY
12
+ self.api_version = "2.0"
13
+ self.lang = "US"
14
+
15
+ def parse(self, text, verify=True):
16
+ #session = requests.Session()
17
+ session = cloudscraper.create_scraper()
18
+ request = session.get(
19
+ self.url,
20
+ params={
21
+ "lang": self.lang,
22
+ "apiKey": self.api_key,
23
+ "clientVersion": self.api_version,
24
+ "text": text,
25
+ },
26
+ verify=verify,
27
+ )
28
+ data = request.json()
29
+ return self._process_data(text, data)
30
+
31
+ @staticmethod
32
+ def _change_char(original_text, from_position, to_position, change_with):
33
+ return "{}{}{}".format(
34
+ original_text[:from_position], change_with, original_text[to_position + 1 :]
35
+ )
36
+
37
+ def _process_data(self, text, data):
38
+ result = text
39
+ corrections = []
40
+
41
+ for suggestion in reversed(data["Corrections"]):
42
+ start = suggestion["From"]
43
+ end = suggestion["To"]
44
+
45
+ if suggestion["Suggestions"]:
46
+ suggest = suggestion["Suggestions"][0]
47
+ result = self._change_char(result, start, end, suggest["Text"])
48
+
49
+ corrections.append(
50
+ {
51
+ "start": start,
52
+ "text": text[start : end + 1],
53
+ "correct": suggest.get("Text", None),
54
+ "definition": suggest.get("Definition", None),
55
+ }
56
+ )
57
+
58
+ return {"text": text, "result": result, "corrections": corrections}
output_beautify.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from gingerit import GingerIt
3
+ import pysbd
4
+
5
+
6
+ segmentor = pysbd.Segmenter(language="en", clean=False)
7
+ subsegment_re = r'[^;:\n•]+[;,:\n•]?\s*'
8
+
9
+
10
+ def clean_text(text):
11
+ s1 = re.sub("\*", " ", text)
12
+ s2 = re.sub("\|"," ",s1)
13
+ s3 = re.sub("--+"," ",s2)
14
+ s4 = re.sub("\#", " ", s3)
15
+ s5 = re.sub("\n", " ", s4)
16
+ s6 = re.sub(" +"," ",s5)
17
+ return s6
18
+
19
+
20
+ def runGinger(par):
21
+ par = clean_text(par)
22
+ fixed = []
23
+ for sentence in segmentor.segment(par):
24
+ if len(sentence) < 300:
25
+ fixed.append(GingerIt().parse(sentence)['result'])
26
+ else:
27
+ subsegments = re.findall(subsegment_re, sentence)
28
+ if len(subsegments) == 1 or any(len(v) < 300 for v in subsegments):
29
+ print(f'Skipped: {sentence}')
30
+ fixed.append(sentence)
31
+ else:
32
+ res = []
33
+ for s in subsegments:
34
+ res.append(GingerIt().parse(s)['result'])
35
+ fixed.append("".join(res))
36
+ return " ".join(fixed)