Goodsea mertcobanov commited on
Commit
a43c3a5
0 Parent(s):

Duplicate from mertcobanov/deprem-ocr-2

Browse files

Co-authored-by: Mert Cobanov <[email protected]>

Files changed (4) hide show
  1. .gitattributes +34 -0
  2. README.md +13 -0
  3. app.py +145 -0
  4. requirements.txt +5 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Deprem Ocr 2
3
+ emoji: 👀
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.17.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: mertcobanov/deprem-ocr-2
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from easyocr import Reader
3
+ from PIL import Image
4
+ import io
5
+ import json
6
+ import csv
7
+ import openai
8
+ import ast
9
+ import os
10
+ from deta import Deta
11
+
12
+
13
+ openai.api_key = os.getenv('API_KEY')
14
+ reader = Reader(["tr"])
15
+
16
+ def get_parsed_address(input_img):
17
+
18
+ address_full_text = get_text(input_img)
19
+ return openai_response(address_full_text)
20
+
21
+
22
+ def get_text(input_img):
23
+ result = reader.readtext(input_img, detail=0)
24
+ return " ".join(result)
25
+
26
+
27
+ def save_csv(mahalle, il, sokak, apartman):
28
+ adres_full = [mahalle, il, sokak, apartman]
29
+
30
+ with open("adress_book.csv", "a", encoding="utf-8") as f:
31
+ write = csv.writer(f)
32
+ write.writerow(adres_full)
33
+ return adres_full
34
+
35
+
36
+ def get_json(mahalle, il, sokak, apartman):
37
+ adres = {"mahalle": mahalle, "il": il, "sokak": sokak, "apartman": apartman}
38
+ dump = json.dumps(adres, indent=4, ensure_ascii=False)
39
+ return dump
40
+
41
+ def write_db(data_dict):
42
+ # 2) initialize with a project key
43
+ deta_key = os.getenv('DETA_KEY')
44
+ deta = Deta(deta_key)
45
+
46
+ # 3) create and use as many DBs as you want!
47
+ users = deta.Base("deprem-ocr")
48
+ users.insert(data_dict)
49
+
50
+
51
+ def text_dict(input):
52
+ eval_result = ast.literal_eval(input)
53
+ write_db(eval_result)
54
+
55
+ return (
56
+ str(eval_result['city']),
57
+ str(eval_result['distinct']),
58
+ str(eval_result['neighbourhood']),
59
+ str(eval_result['street']),
60
+ str(eval_result['address']),
61
+ str(eval_result['tel']),
62
+ str(eval_result['name_surname']),
63
+ str(eval_result['no']),
64
+ )
65
+
66
+ def openai_response(ocr_input):
67
+ prompt = f"""Tabular Data Extraction You are a highly intelligent and accurate tabular data extractor from
68
+ plain text input and especially from emergency text that carries address information, your inputs can be text
69
+ of arbitrary size, but the output should be in [{{'tabular': {{'entity_type': 'entity'}} }}] JSON format Force it
70
+ to only extract keys that are shared as an example in the examples section, if a key value is not found in the
71
+ text input, then it should be ignored. Have only city, distinct, neighbourhood,
72
+ street, no, tel, name_surname, address Examples: Input: Deprem sırasında evimizde yer alan adresimiz: İstanbul,
73
+ Beşiktaş, Yıldız Mahallesi, Cumhuriyet Caddesi No: 35, cep telefonu numaram 5551231256, adim Ahmet Yilmaz
74
+ Output: {{'city': 'İstanbul', 'distinct': 'Beşiktaş', 'neighbourhood': 'Yıldız Mahallesi', 'street': 'Cumhuriyet Caddesi', 'no': '35', 'tel': '5551231256', 'name_surname': 'Ahmet Yılmaz', 'address': 'İstanbul, Beşiktaş, Yıldız Mahallesi, Cumhuriyet Caddesi No: 35'}}
75
+ Input: {ocr_input}
76
+ Output:
77
+ """
78
+
79
+ response = openai.Completion.create(
80
+ model="text-davinci-003",
81
+ prompt=prompt,
82
+ temperature=0,
83
+ max_tokens=300,
84
+ top_p=1,
85
+ frequency_penalty=0.0,
86
+ presence_penalty=0.0,
87
+ stop=["\n"],
88
+ )
89
+ resp = response["choices"][0]["text"]
90
+ print(resp)
91
+ resp = eval(resp.replace("'{", "{").replace("}'", "}"))
92
+ resp["input"] = ocr_input
93
+ dict_keys = [
94
+ 'city',
95
+ 'distinct',
96
+ 'neighbourhood',
97
+ 'street',
98
+ 'no',
99
+ 'tel',
100
+ 'name_surname',
101
+ 'address',
102
+ 'input',
103
+ ]
104
+ for key in dict_keys:
105
+ if key not in resp.keys():
106
+ resp[key] = ''
107
+ return resp
108
+
109
+
110
+ with gr.Blocks() as demo:
111
+ gr.Markdown(
112
+ """
113
+ # Enkaz Bildirme Uygulaması
114
+ """)
115
+ gr.Markdown("Bu uygulamada ekran görüntüsü sürükleyip bırakarak AFAD'a enkaz bildirimi yapabilirsiniz. Mesajı metin olarak da girebilirsiniz, tam adresi ayrıştırıp döndürür. API olarak kullanmak isterseniz sayfanın en altında use via api'ya tıklayın.")
116
+ with gr.Row():
117
+ img_area = gr.Image(label="Ekran Görüntüsü yükleyin 👇")
118
+ ocr_result = gr.Textbox(label="Metin yükleyin 👇 ")
119
+ open_api_text = gr.Textbox(label="Tam Adres")
120
+ submit_button = gr.Button(label="Yükle")
121
+ with gr.Column():
122
+ with gr.Row():
123
+ city = gr.Textbox(label="İl")
124
+ distinct = gr.Textbox(label="İlçe")
125
+ with gr.Row():
126
+ neighbourhood = gr.Textbox(label="Mahalle")
127
+ street = gr.Textbox(label="Sokak/Cadde/Bulvar")
128
+ with gr.Row():
129
+ tel = gr.Textbox(label="Telefon")
130
+ with gr.Row():
131
+ name_surname = gr.Textbox(label="İsim Soyisim")
132
+ address = gr.Textbox(label="Adres")
133
+ with gr.Row():
134
+ no = gr.Textbox(label="Kapı No")
135
+
136
+
137
+ submit_button.click(get_parsed_address, inputs = img_area, outputs = open_api_text, api_name="upload_image")
138
+
139
+ ocr_result.change(openai_response, ocr_result, open_api_text, api_name="upload-text")
140
+
141
+ open_api_text.change(text_dict, open_api_text, [city, distinct, neighbourhood, street, address, tel, name_surname, no])
142
+
143
+
144
+ if __name__ == "__main__":
145
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ openai
2
+ Pillow
3
+ easyocr
4
+ gradio
5
+ deta