File size: 10,682 Bytes
a4208a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
[
  {
    "text": "آشپزخانه کوچک من: February 2012",
    "decoded_text": "<unk>ا<unk> <unk> <unk>: February 2012",
    "diff": [
      "replace   text[0:5] --> decoded_text[0:5]  'آشپزخ' --> '<unk>'",
      "replace   text[6:8] --> decoded_text[6:11]     'نه' --> '<unk>'",
      "replace   text[9:13] --> decoded_text[12:17]   'کوچک' --> '<unk>'",
      "replace   text[14:16] --> decoded_text[18:23]     'من' --> '<unk>'"
    ],
    "n_oov_chars": 13,
    "oov_ratio": 0.41935483870967744,
    "oov_charset": "[\"آ\", \"ش\", \"پ\", \"ز\", \"خ\", \"ن\", \"ه\", \"ک\", \"و\", \"چ\", \"م\"]"
  },
  {
    "text": "آشپزخانه کوچک من",
    "decoded_text": "<unk>ا<unk> <unk> <unk>",
    "diff": [
      "replace   text[0:5] --> decoded_text[0:5]  'آشپزخ' --> '<unk>'",
      "replace   text[6:8] --> decoded_text[6:11]     'نه' --> '<unk>'",
      "replace   text[9:13] --> decoded_text[12:17]   'کوچک' --> '<unk>'",
      "replace   text[14:16] --> decoded_text[18:23]     'من' --> '<unk>'"
    ],
    "n_oov_chars": 13,
    "oov_ratio": 0.8125,
    "oov_charset": "[\"آ\", \"ش\", \"پ\", \"ز\", \"خ\", \"ن\", \"ه\", \"ک\", \"و\", \"چ\", \"م\"]"
  },
  {
    "text": "بکینگ پودر:2 قاشق چای خوری",
    "decoded_text": "<unk> <unk>:2 <unk>ا<unk> <unk>ا<unk> <unk>",
    "diff": [
      "replace   text[0:5] --> decoded_text[0:5]  'بکینگ' --> '<unk>'",
      "replace   text[6:10] --> decoded_text[6:11]   'پودر' --> '<unk>'",
      "replace   text[13:14] --> decoded_text[14:19]      'ق' --> '<unk>'",
      "replace   text[15:17] --> decoded_text[20:25]     'شق' --> '<unk>'",
      "replace   text[18:19] --> decoded_text[26:31]      'چ' --> '<unk>'",
      "replace   text[20:21] --> decoded_text[32:37]      'ی' --> '<unk>'",
      "replace   text[22:26] --> decoded_text[38:43]   'خوری' --> '<unk>'"
    ],
    "n_oov_chars": 18,
    "oov_ratio": 0.6923076923076923,
    "oov_charset": "[\"ب\", \"ک\", \"ی\", \"ن\", \"گ\", \"پ\", \"و\", \"د\", \"ر\", \"ق\", \"ش\", \"چ\", \"خ\"]"
  },
  {
    "text": "تخم مرغ:2 عدد بزرگ",
    "decoded_text": "<unk> <unk>:2 <unk> <unk>",
    "diff": [
      "replace   text[0:3] --> decoded_text[0:5]    'تخم' --> '<unk>'",
      "replace   text[4:7] --> decoded_text[6:11]    'مرغ' --> '<unk>'",
      "replace   text[10:13] --> decoded_text[14:19]    'عدد' --> '<unk>'",
      "replace   text[14:18] --> decoded_text[20:25]   'بزرگ' --> '<unk>'"
    ],
    "n_oov_chars": 13,
    "oov_ratio": 0.7222222222222222,
    "oov_charset": "[\"ت\", \"خ\", \"م\", \"ر\", \"غ\", \"ع\", \"د\", \"ب\", \"ز\", \"گ\"]"
  },
  {
    "text": "کره:225 گرم به دمای اتاق رسیده",
    "decoded_text": "<unk>:225 <unk> <unk> <unk>ا<unk> ا<unk>ا<unk> <unk>",
    "diff": [
      "replace   text[0:3] --> decoded_text[0:5]    'کره' --> '<unk>'",
      "replace   text[8:11] --> decoded_text[10:15]    'گرم' --> '<unk>'",
      "replace   text[12:14] --> decoded_text[16:21]     'به' --> '<unk>'",
      "replace   text[15:17] --> decoded_text[22:27]     'دم' --> '<unk>'",
      "replace   text[18:19] --> decoded_text[28:33]      'ی' --> '<unk>'",
      "replace   text[21:22] --> decoded_text[35:40]      'ت' --> '<unk>'",
      "replace   text[23:24] --> decoded_text[41:46]      'ق' --> '<unk>'",
      "replace   text[25:30] --> decoded_text[47:52]  'رسیده' --> '<unk>'"
    ],
    "n_oov_chars": 18,
    "oov_ratio": 0.6,
    "oov_charset": "[\"ک\", \"ر\", \"ه\", \"گ\", \"م\", \"ب\", \"د\", \"ی\", \"ت\", \"ق\", \"س\"]"
  },
  {
    "text": "شکر:1و1/2 پیمانه+ 3 قاشق غذا خوری",
    "decoded_text": "<unk>:1<unk>1/2 <unk>ا<unk>+ 3 <unk>ا<unk> <unk>ا <unk>",
    "diff": [
      "replace   text[0:3] --> decoded_text[0:5]    'شکر' --> '<unk>'",
      "replace   text[5:6] --> decoded_text[7:12]      'و' --> '<unk>'",
      "replace   text[10:13] --> decoded_text[16:21]    'پیم' --> '<unk>'",
      "replace   text[14:16] --> decoded_text[22:27]     'نه' --> '<unk>'",
      "replace   text[20:21] --> decoded_text[31:36]      'ق' --> '<unk>'",
      "replace   text[22:24] --> decoded_text[37:42]     'شق' --> '<unk>'",
      "replace   text[25:27] --> decoded_text[43:48]     'غذ' --> '<unk>'",
      "replace   text[29:33] --> decoded_text[50:55]   'خوری' --> '<unk>'"
    ],
    "n_oov_chars": 18,
    "oov_ratio": 0.5454545454545454,
    "oov_charset": "[\"ش\", \"ک\", \"ر\", \"و\", \"پ\", \"ی\", \"م\", \"ن\", \"ه\", \"ق\", \"غ\", \"ذ\", \"خ\"]"
  },
  {
    "text": "پودر دارچین:2 و1/2قاشق چای خوری",
    "decoded_text": "<unk> <unk>ا<unk>:2 <unk>1/2<unk>ا<unk> <unk>ا<unk> <unk>",
    "diff": [
      "replace   text[0:4] --> decoded_text[0:5]   'پودر' --> '<unk>'",
      "replace   text[5:6] --> decoded_text[6:11]      'د' --> '<unk>'",
      "replace   text[7:11] --> decoded_text[12:17]   'رچین' --> '<unk>'",
      "replace   text[14:15] --> decoded_text[20:25]      'و' --> '<unk>'",
      "replace   text[18:19] --> decoded_text[28:33]      'ق' --> '<unk>'",
      "replace   text[20:22] --> decoded_text[34:39]     'شق' --> '<unk>'",
      "replace   text[23:24] --> decoded_text[40:45]      'چ' --> '<unk>'",
      "replace   text[25:26] --> decoded_text[46:51]      'ی' --> '<unk>'",
      "replace   text[27:31] --> decoded_text[52:57]   'خوری' --> '<unk>'"
    ],
    "n_oov_chars": 19,
    "oov_ratio": 0.6129032258064516,
    "oov_charset": "[\"پ\", \"و\", \"د\", \"ر\", \"چ\", \"ی\", \"ن\", \"ق\", \"ش\", \"خ\"]"
  },
  {
    "text": "فر رو روی 350 درجه فارنهایت روشن کنید",
    "decoded_text": "<unk> <unk> <unk> 350 <unk> <unk>ا<unk>ا<unk> <unk> <unk>",
    "diff": [
      "replace   text[0:2] --> decoded_text[0:5]     'فر' --> '<unk>'",
      "replace   text[3:5] --> decoded_text[6:11]     'رو' --> '<unk>'",
      "replace   text[6:9] --> decoded_text[12:17]    'روی' --> '<unk>'",
      "replace   text[14:18] --> decoded_text[22:27]   'درجه' --> '<unk>'",
      "replace   text[19:20] --> decoded_text[28:33]      'ف' --> '<unk>'",
      "replace   text[21:24] --> decoded_text[34:39]    'رنه' --> '<unk>'",
      "replace   text[25:27] --> decoded_text[40:45]     'یت' --> '<unk>'",
      "replace   text[28:32] --> decoded_text[46:51]   'روشن' --> '<unk>'",
      "replace   text[33:37] --> decoded_text[52:57]   'کنید' --> '<unk>'"
    ],
    "n_oov_chars": 25,
    "oov_ratio": 0.6756756756756757,
    "oov_charset": "[\"ف\", \"ر\", \"و\", \"ی\", \"د\", \"ج\", \"ه\", \"ن\", \"ت\", \"ش\", \"ک\"]"
  },
  {
    "text": "کره رو با شکر هم بزنید تا یکدست و کرمی بشه تخم مرغها رو دونه دونه اضافه کنید و هم بزنید",
    "decoded_text": "<unk> <unk> <unk>ا <unk> <unk> <unk> <unk>ا <unk> <unk> <unk> <unk> <unk> <unk>ا <unk> <unk> <unk> ا<unk>ا<unk> <unk> <unk> <unk> <unk>",
    "diff": [
      "replace   text[0:3] --> decoded_text[0:5]    'کره' --> '<unk>'",
      "replace   text[4:6] --> decoded_text[6:11]     'رو' --> '<unk>'",
      "replace   text[7:8] --> decoded_text[12:17]      'ب' --> '<unk>'",
      "replace   text[10:13] --> decoded_text[19:24]    'شکر' --> '<unk>'",
      "replace   text[14:16] --> decoded_text[25:30]     'هم' --> '<unk>'",
      "replace   text[17:22] --> decoded_text[31:36]  'بزنید' --> '<unk>'",
      "replace   text[23:24] --> decoded_text[37:42]      'ت' --> '<unk>'",
      "replace   text[26:31] --> decoded_text[44:49]  'یکدست' --> '<unk>'",
      "replace   text[32:33] --> decoded_text[50:55]      'و' --> '<unk>'",
      "replace   text[34:38] --> decoded_text[56:61]   'کرمی' --> '<unk>'",
      "replace   text[39:42] --> decoded_text[62:67]    'بشه' --> '<unk>'",
      "replace   text[43:46] --> decoded_text[68:73]    'تخم' --> '<unk>'",
      "replace   text[47:51] --> decoded_text[74:79]   'مرغه' --> '<unk>'",
      "replace   text[53:55] --> decoded_text[81:86]     'رو' --> '<unk>'",
      "replace   text[56:60] --> decoded_text[87:92]   'دونه' --> '<unk>'",
      "replace   text[61:65] --> decoded_text[93:98]   'دونه' --> '<unk>'",
      "replace   text[67:68] --> decoded_text[100:105]      'ض' --> '<unk>'",
      "replace   text[69:71] --> decoded_text[106:111]     'فه' --> '<unk>'",
      "replace   text[72:76] --> decoded_text[112:117]   'کنید' --> '<unk>'",
      "replace   text[77:78] --> decoded_text[118:123]      'و' --> '<unk>'",
      "replace   text[79:81] --> decoded_text[124:129]     'هم' --> '<unk>'",
      "replace   text[82:87] --> decoded_text[130:135]  'بزنید' --> '<unk>'"
    ],
    "n_oov_chars": 62,
    "oov_ratio": 0.7126436781609196,
    "oov_charset": "[\"ک\", \"ر\", \"ه\", \"و\", \"ب\", \"ش\", \"م\", \"ز\", \"ن\", \"ی\", \"د\", \"ت\", \"س\", \"خ\", \"غ\", \"ض\", \"ف\"]"
  },
  {
    "text": "ارد و بکینگ پودر و نمک رو الک کنید و اضافه کنید",
    "decoded_text": "ا<unk> <unk> <unk> <unk> <unk> <unk> <unk> ال<unk> <unk> <unk> ا<unk>ا<unk> <unk>",
    "diff": [
      "replace   text[1:3] --> decoded_text[1:6]     'رد' --> '<unk>'",
      "replace   text[4:5] --> decoded_text[7:12]      'و' --> '<unk>'",
      "replace   text[6:11] --> decoded_text[13:18]  'بکینگ' --> '<unk>'",
      "replace   text[12:16] --> decoded_text[19:24]   'پودر' --> '<unk>'",
      "replace   text[17:18] --> decoded_text[25:30]      'و' --> '<unk>'",
      "replace   text[19:22] --> decoded_text[31:36]    'نمک' --> '<unk>'",
      "replace   text[23:25] --> decoded_text[37:42]     'رو' --> '<unk>'",
      "replace   text[28:29] --> decoded_text[45:50]      'ک' --> '<unk>'",
      "replace   text[30:34] --> decoded_text[51:56]   'کنید' --> '<unk>'",
      "replace   text[35:36] --> decoded_text[57:62]      'و' --> '<unk>'",
      "replace   text[38:39] --> decoded_text[64:69]      'ض' --> '<unk>'",
      "replace   text[40:42] --> decoded_text[70:75]     'فه' --> '<unk>'",
      "replace   text[43:47] --> decoded_text[76:81]   'کنید' --> '<unk>'"
    ],
    "n_oov_chars": 31,
    "oov_ratio": 0.6595744680851063,
    "oov_charset": "[\"ر\", \"د\", \"و\", \"ب\", \"ک\", \"ی\", \"ن\", \"گ\", \"پ\", \"م\", \"ض\", \"ف\", \"ه\"]"
  }
]