DepremOCR (PaddleOCR optimized inference pipeline)

#5
by Goodsea - opened
Files changed (8) hide show
  1. .gitattributes +34 -0
  2. .gitignore +0 -162
  3. README.md +2 -2
  4. app.py +131 -102
  5. db_utils.py +0 -41
  6. openai_api.py +0 -31
  7. requirements.txt +0 -1
  8. utils.py +0 -53
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore DELETED
@@ -1,162 +0,0 @@
1
- # Byte-compiled / optimized / DLL files
2
- __pycache__/
3
- *.py[cod]
4
- *$py.class
5
-
6
- # C extensions
7
- *.so
8
-
9
- # Distribution / packaging
10
- .Python
11
- build/
12
- develop-eggs/
13
- dist/
14
- downloads/
15
- eggs/
16
- .eggs/
17
- lib/
18
- lib64/
19
- parts/
20
- sdist/
21
- var/
22
- wheels/
23
- share/python-wheels/
24
- *.egg-info/
25
- .installed.cfg
26
- *.egg
27
- MANIFEST
28
-
29
- # PyInstaller
30
- # Usually these files are written by a python script from a template
31
- # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
- *.manifest
33
- *.spec
34
-
35
- # Installer logs
36
- pip-log.txt
37
- pip-delete-this-directory.txt
38
-
39
- # Unit test / coverage reports
40
- htmlcov/
41
- .tox/
42
- .nox/
43
- .coverage
44
- .coverage.*
45
- .cache
46
- nosetests.xml
47
- coverage.xml
48
- *.cover
49
- *.py,cover
50
- .hypothesis/
51
- .pytest_cache/
52
- cover/
53
-
54
- # Translations
55
- *.mo
56
- *.pot
57
-
58
- # Django stuff:
59
- *.log
60
- local_settings.py
61
- db.sqlite3
62
- db.sqlite3-journal
63
-
64
- # Flask stuff:
65
- instance/
66
- .webassets-cache
67
-
68
- # Scrapy stuff:
69
- .scrapy
70
-
71
- # Sphinx documentation
72
- docs/_build/
73
-
74
- # PyBuilder
75
- .pybuilder/
76
- target/
77
-
78
- # Jupyter Notebook
79
- .ipynb_checkpoints
80
-
81
- # IPython
82
- profile_default/
83
- ipython_config.py
84
-
85
- # pyenv
86
- # For a library or package, you might want to ignore these files since the code is
87
- # intended to run in multiple environments; otherwise, check them in:
88
- # .python-version
89
-
90
- # pipenv
91
- # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
- # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
- # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
- # install all needed dependencies.
95
- #Pipfile.lock
96
-
97
- # poetry
98
- # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
- # This is especially recommended for binary packages to ensure reproducibility, and is more
100
- # commonly ignored for libraries.
101
- # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
- #poetry.lock
103
-
104
- # pdm
105
- # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
- #pdm.lock
107
- # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
- # in version control.
109
- # https://pdm.fming.dev/#use-with-ide
110
- .pdm.toml
111
-
112
- # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
- __pypackages__/
114
-
115
- # Celery stuff
116
- celerybeat-schedule
117
- celerybeat.pid
118
-
119
- # SageMath parsed files
120
- *.sage.py
121
-
122
- # Environments
123
- .env
124
- .venv
125
- env/
126
- venv/
127
- ENV/
128
- env.bak/
129
- venv.bak/
130
-
131
- # Spyder project settings
132
- .spyderproject
133
- .spyproject
134
-
135
- # Rope project settings
136
- .ropeproject
137
-
138
- # mkdocs documentation
139
- /site
140
-
141
- # mypy
142
- .mypy_cache/
143
- .dmypy.json
144
- dmypy.json
145
-
146
- # Pyre type checker
147
- .pyre/
148
-
149
- # pytype static type analyzer
150
- .pytype/
151
-
152
- # Cython debug symbols
153
- cython_debug/
154
-
155
- # PyCharm
156
- # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
- # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
- # and can be added to the global gitignore or merged into this file. For a more nuclear
159
- # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
- #.idea/
161
-
162
- .DS_Store
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -1,12 +1,12 @@
1
  ---
2
- title: Deprem OCR
3
  emoji: 👀
4
  colorFrom: green
5
  colorTo: blue
6
  sdk: gradio
7
  sdk_version: 3.17.0
8
  app_file: app.py
9
- pinned: true
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Deprem Ocr 2
3
  emoji: 👀
4
  colorFrom: green
5
  colorTo: blue
6
  sdk: gradio
7
  sdk_version: 3.17.0
8
  app_file: app.py
9
+ pinned: false
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,152 +1,181 @@
1
- from PIL import ImageFilter, Image
2
- from easyocr import Reader
3
  import gradio as gr
4
- import numpy as np
 
 
 
 
5
  import openai
6
  import ast
7
- from transformers import pipeline
8
  import os
 
9
 
10
- from openai_api import OpenAI_API
11
- import utils
12
 
13
- openai.api_key = os.getenv("API_KEY")
14
- reader = Reader(["tr"])
 
15
 
 
 
16
 
17
- def get_text(input_img):
18
- img = Image.fromarray(input_img)
19
- detailed = np.asarray(img.filter(ImageFilter.DETAIL))
20
- result = reader.readtext(detailed, detail=0, paragraph=True)
21
- return " ".join(result)
22
 
23
 
24
- # Submit button
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  def get_parsed_address(input_img):
26
 
27
  address_full_text = get_text(input_img)
28
- return ner_response(address_full_text)
29
 
30
 
31
- def save_deta_db(input):
32
- eval_result = ast.literal_eval(input)
33
- utils.write_db(eval_result)
34
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
 
37
- def update_component():
38
- return gr.update(value="Gönderildi, teşekkürler.", visible=True)
 
 
39
 
 
 
 
 
40
 
41
- def clear_textbox(value):
42
- return gr.update(value="")
 
43
 
44
 
45
  def text_dict(input):
46
  eval_result = ast.literal_eval(input)
 
 
47
  return (
48
- str(eval_result["il"]),
49
- str(eval_result["ilce"]),
50
- str(eval_result["mahalle"]),
51
- str(eval_result["sokak"]),
52
- str(eval_result["Apartman/site"]),
53
- str(eval_result["no"]),
54
- str(eval_result["ad-soyad"]),
55
- str(eval_result["dis kapi no"]),
56
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
-
59
- def ner_response(ocr_input):
60
-
61
- ner_pipe = pipeline("token-classification","deprem-ml/deprem-ner", aggregation_strategy="first")
62
- predictions = ner_pipe(ocr_input)
63
- resp = {}
64
-
65
- for item in predictions:
66
- print(item)
67
- key = item["entity_group"]
68
- resp[key] = item["word"]
69
-
70
  resp["input"] = ocr_input
71
- dict_keys = ["il", "ilce", "mahalle", "sokak", "Apartman/site", "no", "ad-soyad", "dis kapi no"]
 
 
 
 
 
 
 
 
 
 
72
  for key in dict_keys:
73
  if key not in resp.keys():
74
- resp[key] = ""
75
  return resp
76
 
77
 
78
- # User Interface
79
  with gr.Blocks() as demo:
80
  gr.Markdown(
81
- """
82
- # Enkaz Bildirme Uygulaması
83
  """
84
- )
85
- gr.Markdown(
86
- "Bu uygulamada ekran görüntüsü sürükleyip bırakarak AFAD'a enkaz bildirimi yapabilirsiniz. Mesajı metin olarak da girebilirsiniz, tam adresi ayrıştırıp döndürür. API olarak kullanmak isterseniz sayfanın en altında use via api'ya tıklayın."
87
- )
88
  with gr.Row():
89
- with gr.Column():
90
- img_area = gr.Image(label="Ekran Görüntüsü yükleyin 👇")
91
- img_area_button = gr.Button(value="Görüntüyü İşle", label="Submit")
92
-
93
- with gr.Column():
94
- text_area = gr.Textbox(label="Metin yükleyin 👇 ", lines=8)
95
- text_area_button = gr.Button(value="Metni Yükle", label="Submit")
96
-
97
  open_api_text = gr.Textbox(label="Tam Adres")
98
-
99
  with gr.Column():
100
  with gr.Row():
101
- il = gr.Textbox(label="İl", interactive=True, show_progress=False)
102
- ilce = gr.Textbox(label="İlçe", interactive=True, show_progress=False)
103
  with gr.Row():
104
- mahalle = gr.Textbox(
105
- label="Mahalle", interactive=True, show_progress=False
106
- )
107
- sokak = gr.Textbox(
108
- label="Sokak/Cadde/Bulvar", interactive=True, show_progress=False
109
- )
110
  with gr.Row():
111
- no = gr.Textbox(label="Telefon", interactive=True, show_progress=False)
112
  with gr.Row():
113
- ad_soyad = gr.Textbox(
114
- label="İsim Soyisim", interactive=True, show_progress=False
115
- )
116
- apartman = gr.Textbox(label="apartman", interactive=True, show_progress=False)
117
  with gr.Row():
118
- dis_kapi_no = gr.Textbox(label="Kapı No", interactive=True, show_progress=False)
119
 
120
- img_area_button.click(
121
- get_parsed_address,
122
- inputs=img_area,
123
- outputs=open_api_text,
124
- api_name="upload-image",
125
- )
126
 
127
- text_area_button.click(
128
- ner_response, text_area, open_api_text, api_name="upload-text"
129
- )
130
 
 
131
 
132
- open_api_text.change(
133
- text_dict,
134
- open_api_text,
135
- [il, ilce, mahalle, sokak, no, apartman, ad_soyad, dis_kapi_no],
136
- )
137
- ocr_button = gr.Button(value="Sadece OCR kullan")
138
- ocr_button.click(
139
- get_text,
140
- inputs=img_area,
141
- outputs=text_area,
142
- api_name="get-ocr-output",
143
- )
144
- submit_button = gr.Button(value="Veriyi Birimlere Yolla")
145
- submit_button.click(save_deta_db, open_api_text)
146
- done_text = gr.Textbox(label="Done", value="Not Done", visible=False)
147
- submit_button.click(update_component, outputs=done_text)
148
- for txt in [il, ilce, mahalle, sokak, apartman, no, ad_soyad, dis_kapi_no]:
149
- submit_button.click(fn=clear_textbox, inputs=txt, outputs=txt)
150
 
151
 
152
  if __name__ == "__main__":
 
 
 
1
  import gradio as gr
2
+ from easyocr import Reader
3
+ from PIL import Image
4
+ import io
5
+ import json
6
+ import csv
7
  import openai
8
  import ast
 
9
  import os
10
+ from deta import Deta
11
 
 
 
12
 
13
+ ######################
14
+ import requests
15
+ import json
16
 
17
+ import os
18
+ import openai
19
 
 
 
 
 
 
20
 
21
 
22
+ class OpenAI_API:
23
+ def __init__(self):
24
+ self.openai_api_key = ''
25
+
26
+ def single_request(self, address_text):
27
+
28
+ openai.api_type = "azure"
29
+ openai.api_base = "https://damlaopenai.openai.azure.com/"
30
+ openai.api_version = "2022-12-01"
31
+ openai.api_key = os.getenv("API_KEY")
32
+
33
+ response = openai.Completion.create(
34
+ engine="Davinci-003",
35
+ prompt=address_text,
36
+ temperature=0.9,
37
+ max_tokens=256,
38
+ top_p=1.0,
39
+ n=1,
40
+ logprobs=0,
41
+ echo=False,
42
+ stop=None,
43
+ frequency_penalty=0,
44
+ presence_penalty=0,
45
+ best_of=1)
46
+
47
+ return response
48
+
49
+ ########################
50
+
51
+ openai.api_key = os.getenv('API_KEY')
52
+ reader = Reader(["tr"])
53
+
54
+
55
  def get_parsed_address(input_img):
56
 
57
  address_full_text = get_text(input_img)
58
+ return openai_response(address_full_text)
59
 
60
 
61
+ def preprocess_img(inp_image):
62
+ gray = cv2.cvtColor(inp_image, cv2.COLOR_BGR2GRAY)
63
+ gray_img = cv2.bitwise_not(gray)
64
+ return gray_img
65
+
66
+
67
+ def get_text(input_img):
68
+ result = reader.readtext(input_img, detail=0)
69
+ return " ".join(result)
70
+
71
+
72
+ def save_csv(mahalle, il, sokak, apartman):
73
+ adres_full = [mahalle, il, sokak, apartman]
74
+
75
+ with open("adress_book.csv", "a", encoding="utf-8") as f:
76
+ write = csv.writer(f)
77
+ write.writerow(adres_full)
78
+ return adres_full
79
 
80
 
81
+ def get_json(mahalle, il, sokak, apartman):
82
+ adres = {"mahalle": mahalle, "il": il, "sokak": sokak, "apartman": apartman}
83
+ dump = json.dumps(adres, indent=4, ensure_ascii=False)
84
+ return dump
85
 
86
+ def write_db(data_dict):
87
+ # 2) initialize with a project key
88
+ deta_key = os.getenv('DETA_KEY')
89
+ deta = Deta(deta_key)
90
 
91
+ # 3) create and use as many DBs as you want!
92
+ users = deta.Base("deprem-ocr")
93
+ users.insert(data_dict)
94
 
95
 
96
  def text_dict(input):
97
  eval_result = ast.literal_eval(input)
98
+ write_db(eval_result)
99
+
100
  return (
101
+ str(eval_result['city']),
102
+ str(eval_result['distinct']),
103
+ str(eval_result['neighbourhood']),
104
+ str(eval_result['street']),
105
+ str(eval_result['address']),
106
+ str(eval_result['tel']),
107
+ str(eval_result['name_surname']),
108
+ str(eval_result['no']),
109
  )
110
+
111
+ def openai_response(ocr_input):
112
+ prompt = f"""Tabular Data Extraction You are a highly intelligent and accurate tabular data extractor from
113
+ plain text input and especially from emergency text that carries address information, your inputs can be text
114
+ of arbitrary size, but the output should be in [{{'tabular': {{'entity_type': 'entity'}} }}] JSON format Force it
115
+ to only extract keys that are shared as an example in the examples section, if a key value is not found in the
116
+ text input, then it should be ignored. Have only city, distinct, neighbourhood,
117
+ street, no, tel, name_surname, address Examples: Input: Deprem sırasında evimizde yer alan adresimiz: İstanbul,
118
+ Beşiktaş, Yıldız Mahallesi, Cumhuriyet Caddesi No: 35, cep telefonu numaram 5551231256, adim Ahmet Yilmaz
119
+ Output: {{'city': 'İstanbul', 'distinct': 'Beşiktaş', 'neighbourhood': 'Yıldız Mahallesi', 'street': 'Cumhuriyet Caddesi', 'no': '35', 'tel': '5551231256', 'name_surname': 'Ahmet Yılmaz', 'address': 'İstanbul, Beşiktaş, Yıldız Mahallesi, Cumhuriyet Caddesi No: 35'}}
120
+ Input: {ocr_input}
121
+ Output:
122
+ """
123
 
124
+ openai_client = OpenAI_API()
125
+ response = openai_client.single_request(ocr_input)
126
+ resp = response["choices"][0]["text"]
127
+ print(resp)
128
+ resp = eval(resp.replace("'{", "{").replace("}'", "}"))
 
 
 
 
 
 
 
129
  resp["input"] = ocr_input
130
+ dict_keys = [
131
+ 'city',
132
+ 'distinct',
133
+ 'neighbourhood',
134
+ 'street',
135
+ 'no',
136
+ 'tel',
137
+ 'name_surname',
138
+ 'address',
139
+ 'input',
140
+ ]
141
  for key in dict_keys:
142
  if key not in resp.keys():
143
+ resp[key] = ''
144
  return resp
145
 
146
 
 
147
  with gr.Blocks() as demo:
148
  gr.Markdown(
 
 
149
  """
150
+ # Enkaz Bildirme Uygulaması
151
+ """)
152
+ gr.Markdown("Bu uygulamada ekran görüntüsü sürükleyip bırakarak AFAD'a enkaz bildirimi yapabilirsiniz. Mesajı metin olarak da girebilirsiniz, tam adresi ayrıştırıp döndürür. API olarak kullanmak isterseniz sayfanın en altında use via api'ya tıklayın.")
 
153
  with gr.Row():
154
+ img_area = gr.Image(label="Ekran Görüntüsü yükleyin 👇")
155
+ ocr_result = gr.Textbox(label="Metin yükleyin 👇 ")
 
 
 
 
 
 
156
  open_api_text = gr.Textbox(label="Tam Adres")
157
+ submit_button = gr.Button(label="Yükle")
158
  with gr.Column():
159
  with gr.Row():
160
+ city = gr.Textbox(label="İl")
161
+ distinct = gr.Textbox(label="İlçe")
162
  with gr.Row():
163
+ neighbourhood = gr.Textbox(label="Mahalle")
164
+ street = gr.Textbox(label="Sokak/Cadde/Bulvar")
 
 
 
 
165
  with gr.Row():
166
+ tel = gr.Textbox(label="Telefon")
167
  with gr.Row():
168
+ name_surname = gr.Textbox(label="İsim Soyisim")
169
+ address = gr.Textbox(label="Adres")
 
 
170
  with gr.Row():
171
+ no = gr.Textbox(label="Kapı No")
172
 
 
 
 
 
 
 
173
 
174
+ submit_button.click(get_parsed_address, inputs = img_area, outputs = open_api_text, api_name="upload_image")
 
 
175
 
176
+ ocr_result.change(openai_response, ocr_result, open_api_text, api_name="upload-text")
177
 
178
+ open_api_text.change(text_dict, open_api_text, [city, distinct, neighbourhood, street, address, tel, name_surname, no])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
 
181
  if __name__ == "__main__":
db_utils.py DELETED
@@ -1,41 +0,0 @@
1
- from deta import Deta # Import Deta
2
- from pprint import pprint
3
- import os
4
-
5
- deta_key = os.getenv("DETA_KEY")
6
- deta = Deta(deta_key)
7
- db = deta.Base("deprem-ocr")
8
-
9
-
10
- def get_users_by_city(city_name, limit=10):
11
-
12
- user = db.fetch({"city": city_name.capitalize()}, limit=limit).items
13
- return user
14
-
15
-
16
- def get_all():
17
- res = db.fetch()
18
- all_items = res.items
19
-
20
- # fetch until last is 'None'
21
- while res.last:
22
- res = db.fetch(last=res.last)
23
- all_items += res.items
24
- return all_items
25
-
26
-
27
- def write_db(data_dict):
28
- # 2) initialize with a project key
29
- deta_key = os.getenv("DETA_KEY")
30
- deta = Deta(deta_key)
31
-
32
- # 3) create and use as many DBs as you want!
33
- users = deta.Base("deprem-ocr")
34
- users.insert(data_dict)
35
- print("Pushed to db")
36
-
37
-
38
- def get_latest_row(last):
39
- all_items = get_all()
40
- latest_items = all_items[-last:]
41
- return latest_items
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
openai_api.py DELETED
@@ -1,31 +0,0 @@
1
- import openai
2
- import os
3
-
4
-
5
- class OpenAI_API:
6
- def __init__(self):
7
- self.openai_api_key = ""
8
-
9
- def single_request(self, address_text):
10
-
11
- openai.api_type = "azure"
12
- openai.api_base = "https://afet-org.openai.azure.com/"
13
- openai.api_version = "2022-12-01"
14
- openai.api_key = os.getenv("API_KEY")
15
-
16
- response = openai.Completion.create(
17
- engine="afet-org",
18
- prompt=address_text,
19
- temperature=0.0,
20
- max_tokens=500,
21
- top_p=1,
22
- # n=1,
23
- # logprobs=0,
24
- # echo=False,
25
- stop=["\n"],
26
- frequency_penalty=0,
27
- presence_penalty=0,
28
- # best_of=1,
29
- )
30
-
31
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -3,4 +3,3 @@ Pillow
3
  easyocr
4
  gradio
5
  deta
6
- transformers
 
3
  easyocr
4
  gradio
5
  deta
 
utils.py DELETED
@@ -1,53 +0,0 @@
1
- import cv2
2
- import csv
3
- import json
4
- from deta import Deta
5
- import os
6
- import requests
7
-
8
-
9
- def preprocess_img(inp_image):
10
- gray = cv2.cvtColor(inp_image, cv2.COLOR_BGR2GRAY)
11
- gray_img = cv2.bitwise_not(gray)
12
- return gray_img
13
-
14
-
15
- def save_csv(mahalle, il, sokak, apartman):
16
- adres_full = [mahalle, il, sokak, apartman]
17
-
18
- with open("adress_book.csv", "a", encoding="utf-8") as f:
19
- write = csv.writer(f)
20
- write.writerow(adres_full)
21
- return adres_full
22
-
23
-
24
- def get_json(mahalle, il, sokak, apartman):
25
- adres = {"mahalle": mahalle, "il": il, "sokak": sokak, "apartman": apartman}
26
- dump = json.dumps(adres, indent=4, ensure_ascii=False)
27
- return dump
28
-
29
-
30
- def write_db(data_dict):
31
- # 2) initialize with a project key
32
- deta_key = os.getenv("DETA_KEY")
33
- deta = Deta(deta_key)
34
-
35
- # 3) create and use as many DBs as you want!
36
- users = deta.Base("deprem-ocr")
37
- users.insert(data_dict)
38
-
39
-
40
- def ner_response(ocr_input):
41
- API_URL = "https://api-inference.huggingface.co/models/deprem-ml/deprem-ner"
42
- headers = {"Authorization": "Bearer xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
43
-
44
- def query(payload):
45
- response = requests.post(API_URL, headers=headers, json=payload)
46
- return response.json()
47
-
48
- output = query(
49
- {
50
- "inputs": ocr_input,
51
- }
52
- )
53
- return output