imryanxu commited on
Commit
190c1c2
·
verified ·
1 Parent(s): 210d151

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +14 -22
README.md CHANGED
@@ -12,33 +12,25 @@ This is a BERT model fine-tuned on a high-quality Chinese financial dataset. It
12
  ## Quickstart
13
  Here is an example code snippet for generating security risk scores using this model.
14
  ```python
15
- import torch
16
- from datasets import load_dataset
17
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
18
 
19
- model_name = "risk-model-zh-v0.1"
20
- dataset_file = "your_dataset.jsonl"
21
- text_column = "text"
22
- output_file = "your_output.jsonl"
23
 
24
- tokenizer = AutoTokenizer.from_pretrained(model_name)
25
- model = AutoModelForSequenceClassification.from_pretrained(model_name, torch_dtype=torch.bfloat16)
26
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
27
- model.to(device)
28
 
29
- dataset = load_dataset('json', data_files=dataset_file, cache_dir="cache/", split='train', num_proc=12)
 
 
30
 
 
31
 
32
- def compute_scores(batch):
33
- inputs = tokenizer(batch[text_column], return_tensors="pt", padding="longest", truncation=True).to(device)
34
- with torch.no_grad():
35
- outputs = model(**inputs)
36
- logits = outputs.logits.squeeze(-1).float().cpu().numpy()
37
 
38
- batch["risk_score"] = logits.tolist()
39
- return batch
40
-
41
-
42
- dataset = dataset.map(compute_scores, batched=True, batch_size=512)
43
- dataset.to_json(output_file)
44
  ```
 
12
  ## Quickstart
13
  Here is an example code snippet for generating security risk scores using this model.
14
  ```python
 
 
15
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
16
 
17
+ text = "你是一个聪明的机器人"
18
+ risk_model_name = "risk-model-zh-v0.1"
 
 
19
 
20
+ risk_tokenizer = AutoTokenizer.from_pretrained(risk_model_name)
21
+ risk_model = AutoModelForSequenceClassification.from_pretrained(risk_model_name)
 
 
22
 
23
+ risk_inputs = risk_tokenizer(text, return_tensors="pt", padding="longest", truncation=True)
24
+ risk_outputs = risk_model(**risk_inputs)
25
+ risk_logits = risk_outputs.logits.squeeze(-1).float().detach().numpy()
26
 
27
+ risk_score = risk_logits.item()
28
 
29
+ result = {
30
+ "text": text,
31
+ "risk_score": risk_score
32
+ }
 
33
 
34
+ print(result)
35
+ # {'text': '你是一个聪明的机器人', 'risk_score': 0.11226219683885574}
 
 
 
 
36
  ```