File size: 6,249 Bytes
550665c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import os
from datasets import Sequence, Value, Features
from datasets import Dataset, DatasetDict

EXAMPLE_FEATURES = Features(
    {
        "guid": Value(dtype="string", id=None),
        "question": Value(dtype="string", id=None),
        "context": Value(dtype="string", id=None),
        "answers": Sequence(
            feature={
                "text": Value(dtype="string", id=None),
                "answer_start": Value(dtype="int32", id=None),
            },
        ),
        "is_impossible": Value(dtype="bool", id=None),
        "title": Value(dtype="string", id=None),
        "classtype": Value(dtype="string", id=None),
        "source": Value(dtype="string", id=None),
        "dataset": Value(dtype="string", id=None),
    }
)

SKETCH_TRAIN_FEATURES = Features(
    {
        "input_ids": Sequence(feature=Value(dtype='int32', id=None)),
        "attention_mask": Sequence(feature=Value(dtype='int8', id=None)),
        "token_type_ids": Sequence(feature=Value(dtype='int8', id=None)),
        "labels": Value(dtype='int64', id=None),
    }
)

SKETCH_EVAL_FEATURES = Features(
    {
        "input_ids": Sequence(feature=Value(dtype='int32', id=None)),
        "attention_mask": Sequence(feature=Value(dtype='int8', id=None)),
        "token_type_ids": Sequence(feature=Value(dtype='int8', id=None)),
        "labels": Value(dtype='int64', id=None),
        "example_id": Value(dtype='string', id=None),
    }
)

INTENSIVE_TRAIN_FEATUERS = Features(
    {
        "input_ids": Sequence(feature=Value(dtype='int32', id=None)),
        "attention_mask": Sequence(feature=Value(dtype='int8', id=None)),
        "token_type_ids": Sequence(feature=Value(dtype='int8', id=None)),
        "start_positions": Value(dtype='int64', id=None),
        "end_positions": Value(dtype='int64', id=None),
        "is_impossibles": Value(dtype='float64', id=None),
    }
)

INTENSIVE_EVAL_FEATUERS = Features(
    {
        "input_ids": Sequence(feature=Value(dtype='int32', id=None)),
        "attention_mask": Sequence(feature=Value(dtype='int8', id=None)),
        "token_type_ids": Sequence(feature=Value(dtype='int8', id=None)),
        "offset_mapping": Sequence(
            feature=Sequence(
                feature=Value(dtype='int64', id=None)
            )
        ),
        "example_id": Value(dtype='string', id=None),
    }
)

QUESTION_COLUMN_NAME = "question"
CONTEXT_COLUMN_NAME = "context"
ANSWER_COLUMN_NAME = "answers"
ANSWERABLE_COLUMN_NAME = "is_impossible"
ID_COLUMN_NAME = "guid"

SCORE_EXT_FILE_NAME = "cls_score.json"
INTENSIVE_PRED_FILE_NAME = "predictions.json"
NBEST_PRED_FILE_NAME = "nbest_predictions.json"
SCORE_DIFF_FILE_NAME = "null_odds.json"

DEFAULT_CONFIG_FILE = os.path.join(
    os.path.realpath(__file__), "args/default_config.yaml"
)

KO_QUERY_HELP_TEXT = "์งˆ๋ฌธ์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”!"
KO_CONTEXT_HELP_TEXT = "๋ฌธ๋งฅ์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”!"

EN_QUERY_HELP_TEXT = "Plz enter your question!"
EN_CONTEXT_HELP_TEXT = "Plz enter your context!"

KO_EXAMPLE_QUERY = "์ด์ˆœ์‹ ์€ ์–ด๋Š ์‹œ๋Œ€์˜ ๋ฌด์‹ ์ด์•ผ?"
KO_EXAMPLE_CONTEXTS = """

16์„ธ๊ธฐ ์กฐ์„ ์˜ ๋ฌด์‹ ์œผ๋กœ, ์ผ๋ณธ์ด ์กฐ์„ ์„ ์นจ๊ณตํ•˜์—ฌ ์ผ์–ด๋‚œ ์ „์Ÿ์ธ ์ž„์ง„์™œ๋ž€ ๋‹น์‹œ ์กฐ์„  ์ˆ˜๊ตฐ์„ ํ†ต์†”ํ–ˆ๋˜ ์ œ๋…์ด์ž ๊ตฌ๊ตญ์˜์›…์ด๋‹ค.

            

์นจ๋žต๊ตฐ๊ณผ ๊ต์ „ํ•˜์—ฌ ์ฒœ์žฌ์ ์ธ ํ™œ์•ฝ์ƒ์„ ํŽผ์น˜๊ณ  ์ค‘์•™ ์ง€์› ์—†์ด ์ž๊ธ‰์ž์กฑ์„ ํ•ด๋‚ธ ๊ตฐ ์ง€ํœ˜๊ด€์ด์ž, ํœ˜ํ•˜ ์ธ์‚ฌ๋“ค์—๊ฒŒ ๋ฒ•์— ๋”ฐ๋ฅธ ์›์น™์„ ์š”๊ตฌํ•˜๋ฉด์„œ๋„ ๋šœ๋ ทํ•œ ์„ฑ๊ณต๋ฅ ๊ณผ ๋ถ€์กฑํ•จ ์—†๋Š” ์ฒ˜์šฐ๋ฅผ ๋ณด์žฅํ•œ ์ƒ๊ด€, ์ง€๋ฐฉ๊ด€ ์‹œ์ ˆ ๋ฐฑ์„ฑ๋“ค์—๊ฒŒ ์„ ์ •์„ ๋ฒ ํ’€๊ณ  ์ „์‹œ์—๋„ ๊ทธ๋“ค์„ ์œ„๋ฌดํ•˜๊ณ  ๊ตฌ์ œํ•œ ๋ชฉ๋ฏผ๊ด€, ๊ณ ์œ„ ๊ด€๋ฃŒ์™€ ์ ‘์„  ๋ฐ ์ถ•์žฌ๋ฅผ ๊ฑฐ๋ถ€ํ•˜๊ณ  ๊ณต์ •๊ณผ ๊ตญ์ต, ์ ˆ์ œ๋ฅผ ์ค‘์‹œํ•œ ์ธ๊ฒฉ์ž, ์ž์‹ ์ด ๊ด€ํ• ํ•œ ์ง€์—ญ์˜ ๋ฐฑ์„ฑ๊ณผ ๋ณ‘์‚ฌ์—๊ฒŒ ๊ฐ์ข… ์‚ฌ์—…์„ ์žฅ๋ คํ•˜์—ฌ ๋งŽ์€ ์ˆ˜ํšจ๋ฅผ ์–ป์–ด๋‚ธ ํ–‰์ •๊ฐ€, ๊ทธ๋ฆฌ๊ณ  ์™•์„ ์œ„์‹œํ•œ ์กฐ์ •์˜ ํ•๋ฐ•์œผ๋กœ ์‚ฌํ˜•์ˆ˜๊ฐ€ ๋˜๊ฑฐ๋‚˜ ํ›„์ž„์ž์˜ ์‹ค์ฑ…์œผ๋กœ ๊ตฐ์‚ฌยท๊ตฐ์„ ๋“ค์„ ๊ฑฐ์˜ ์ƒ์‹คํ•˜๊ฑฐ๋‚˜ ์–ด๋จธ๋‹ˆ์™€ ์•„๋“ค์„ ์žƒ๋Š” ๋“ฑ ๋งŽ์€ ์ˆ˜๋‚œ์„ ๊ฒช๊ณ ๋„ ๋ช…๋Ÿ‰ ํ•ด์ „ ๋“ฑ์— ์ž„ํ•˜๋ฉฐ ๊ตดํ•˜์ง€ ์•Š์€ ์ฒ ์ธ์˜ ๋ฉด๋ชจ๊นŒ์ง€ ๊ฐ–์ถฐ ์กฐ์„  ์ค‘๊ธฐ์˜ ๋ช…์žฅ์„ ๋„˜์–ด ํ•œ๊ตญ์‚ฌ ์ตœ๊ณ  ์œ„์ธ์˜ ๋ฐ˜์—ด๊นŒ์ง€ ์˜ค๋ฅธ ์ธ๋ฌผ์ด๋‹ค.



์ƒ์ „๋ถ€ํ„ฐ ๊ทธ๋ฅผ ์‚ฌ์ ์œผ๋กœ ์•Œ๊ณ  ์žˆ๋˜ ์ธ๊ทผ ๋ฐฑ์„ฑ์ด๋‚˜ ๊ตฐ์กธ, ์ผ๋ถ€ ์žฅ์ˆ˜์™€ ์žฌ์ƒ๋“ค๋กœ๋ถ€ํ„ฐ ๋›ฐ์–ด๋‚œ ์ธ๋ฌผ๋กœ ํ‰๊ฐ€๋ฐ›์•˜๊ณ  ๊ทธ๋ ‡์ง€ ์•Š๋”๋ผ๋„ ๋ช…์„ฑ์ด ์ œ๋ฒ• ์žˆ์—ˆ์œผ๋ฉฐ ์ „์‚ฌ ์†Œ์‹์— ๋งŽ์€ ์ด๊ฐ€ ๋‚จ๋…€๋…ธ์†Œ๋ฅผ ๋ถˆ๋ฌธํ•˜๊ณ  ํฌ๊ฒŒ ์Šฌํผํ–ˆ๋‹ค๊ณ  ์ „ํ•ด์ง„๋‹ค. ์‚ฌํ›„ ์กฐ์ •์€ ๊ด€์ง์„ ์ถ”์ฆํ–ˆ๊ณ  ์„ ๋น„๋“ค์€ ์ฐฌ์–‘์‹œ(่ฉฉ)๋ฅผ ์ง€์—ˆ์œผ๋ฉฐ ๋ฐฑ์„ฑ๋“ค์€ ์ถ”๋ชจ๋น„๋ฅผ ์„ธ์šฐ๋Š” ๋“ฑ, ์ด์ˆœ์‹ ์€ ์˜ค๋ž˜๋„๋ก ๋งŽ์€ ์ถ”์•™์„ ๋ฐ›์•„์™”๋‹ค. ์ด๋Š” ์ผ์ œ๊ฐ•์ ๊ธฐ๋ฅผ ๊ฑฐ์ณ ํ˜„๋Œ€์—๋„ ๋งˆ์ฐฌ๊ฐ€์ง€๋กœ, ์ด์ˆœ์‹ ์€ ๋Œ€ํ•œ๋ฏผ๊ตญ ๊ตญ๋ฏผ๋“ค์ด ๊ฐ€์žฅ ์กด๊ฒฝํ•˜๋Š” ์œ„์ธ ์ค‘ ํ•œ ๋ช…์œผ๋กœ ๊ผฝํžˆ๋ฉฐ ํ˜„๋Œ€ ํ•œ๊ตญ์—์„œ ์„ฑ์›…์ด๋ผ๋Š” ์ตœ์ƒ๊ธ‰ ์ˆ˜์‚ฌ๊ฐ€ ์ด๋ฆ„ ์•ž์— ๋ถ™์–ด๋„ ์–ด๋–ค ์ด์˜๋„ ์ œ๊ธฐ๋ฐ›์ง€ ์•Š๋Š”, ์„ธ์ข…๊ณผ ํ•จ๊ป˜ ํ•œ๊ตญ์ธ์—๊ฒŒ ๊ฐ€์žฅ ์‚ฌ๋ž‘๋ฐ›๋Š” ํ•œ๊ตญ์‚ฌ ์–‘๋Œ€ ์œ„์ธ์ด๋‹ค. ๊ฐ€์žฅ ์กด๊ฒฝํ•˜๋Š” ์œ„์ธ์„ ๋ฌป๋Š” ์„ค๋ฌธ์กฐ์‚ฌ์—์„œ๋„ ์„ธ์ข…๋Œ€์™•๊ณผ 1, 2์œ„๋ฅผ ๋‹คํˆฌ๋ฉฐ ์ถฉ๋ฌด๊ณต์ด๋ผ๋Š” ์‹œํ˜ธ๋„ ์‹ค์ œ๋กœ๋Š” ๊น€์‹œ๋ฏผ๊ณผ ๊ฐ™์€ ์—ฌ๋Ÿฌ ์žฅ์ˆ˜๋“ค์ด ๋ฐ›์€ ์‹œํ˜ธ์ด์ง€๋งŒ ํ˜„๋Œ€ ํ•œ๊ตญ์ธ๋“ค์€ ์ด์ˆœ์‹  ์ „์šฉ ์‹œํ˜ธ๋กœ ์ธ์‹ํ•œ๋‹ค.

""".strip()

EN_EXAMPLE_QUERY = "When did Beyonce start becoming popular?"
EN_EXAMPLE_CONTEXTS = """

Beyoncรฉ Giselle Knowles-Carter (/biหหˆjษ’nseษช/ bee-YON-say) (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny\'s Child. Managed by her father, Mathew Knowles, the group became one of the world\'s best-selling girl groups of all time. Their hiatus saw the release of Beyoncรฉ\'s debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles "Crazy in Love" and "Baby Boy".

""".strip()