real-jiakai commited on
Commit
a8005a7
·
verified ·
1 Parent(s): 20f8e26

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +17 -7
README.md CHANGED
@@ -71,6 +71,7 @@ Evaluation results on the test set:
71
  ## Usage
72
 
73
  ```python
 
74
  from transformers import AutoModelForQuestionAnswering, AutoTokenizer
75
 
76
  # Load model and tokenizer
@@ -78,8 +79,8 @@ model = AutoModelForQuestionAnswering.from_pretrained("real-jiakai/bert-base-chi
78
  tokenizer = AutoTokenizer.from_pretrained("real-jiakai/bert-base-chinese-finetuned-cmrc2018")
79
 
80
  # Prepare inputs
81
- question = "Your question in Chinese"
82
- context = "Context text in Chinese"
83
 
84
  # Tokenize inputs
85
  inputs = tokenizer(
@@ -87,14 +88,24 @@ inputs = tokenizer(
87
  context,
88
  return_tensors="pt",
89
  max_length=384,
90
- truncation=True,
91
- return_offsets_mapping=True
 
 
 
 
 
 
 
 
92
  )
93
 
94
  # Get answer
95
  outputs = model(**inputs)
96
- start_logits = outputs.start_logits
97
- end_logits = outputs.end_logits
 
 
98
  ```
99
 
100
  ## Citation
@@ -121,5 +132,4 @@ If you use this model, please cite the CMRC2018 dataset:
121
  doi = "10.18653/v1/D19-1600",
122
  pages = "5886--5891",
123
  }
124
-
125
  ```
 
71
  ## Usage
72
 
73
  ```python
74
+ import torch
75
  from transformers import AutoModelForQuestionAnswering, AutoTokenizer
76
 
77
  # Load model and tokenizer
 
79
  tokenizer = AutoTokenizer.from_pretrained("real-jiakai/bert-base-chinese-finetuned-cmrc2018")
80
 
81
  # Prepare inputs
82
+ question = "长城有多长?"
83
+ context = "长城是中国古代的伟大建筑工程,全长超过2万公里,横跨中国北部多个省份。"
84
 
85
  # Tokenize inputs
86
  inputs = tokenizer(
 
88
  context,
89
  return_tensors="pt",
90
  max_length=384,
91
+ truncation=True
92
+ )
93
+
94
+ # Tokenize inputs
95
+ inputs = tokenizer(
96
+ question,
97
+ context,
98
+ return_tensors="pt",
99
+ max_length=384,
100
+ truncation=True
101
  )
102
 
103
  # Get answer
104
  outputs = model(**inputs)
105
+ answer_start = torch.argmax(outputs.start_logits)
106
+ answer_end = torch.argmax(outputs.end_logits) + 1
107
+ answer = tokenizer.decode(inputs["input_ids"][0][answer_start:answer_end])
108
+ print("Answer:", answer)
109
  ```
110
 
111
  ## Citation
 
132
  doi = "10.18653/v1/D19-1600",
133
  pages = "5886--5891",
134
  }
 
135
  ```