Files changed (1) hide show
  1. README.md +32 -4
README.md CHANGED
@@ -70,14 +70,42 @@ pt
70
  from transformers import XLMRobertaTokenizerFast, XLMRobertaForSequenceClassification
71
 
72
  # load tokenizer and model weights
73
- tokenizer = XLMRobertaTokenizerFast.from_pretrained('SkolkovoInstitute/xlmr_formality_classifier')
74
- model = XLMRobertaForSequenceClassification.from_pretrained('SkolkovoInstitute/xlmr_formality_classifier')
 
 
 
 
 
 
 
 
75
 
76
  # prepare the input
77
- batch = tokenizer.encode('ты супер', return_tensors='pt')
 
 
 
 
 
 
 
78
 
79
  # inference
80
- model(batch)
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  ```
82
 
83
  ## Citation
 
70
  from transformers import XLMRobertaTokenizerFast, XLMRobertaForSequenceClassification
71
 
72
  # load tokenizer and model weights
73
+ tokenizer = XLMRobertaTokenizerFast.from_pretrained('s-nlp/xlmr_formality_classifier')
74
+ model = XLMRobertaForSequenceClassification.from_pretrained('s-nlp/xlmr_formality_classifier')
75
+
76
+ id2formality = {0: "formal", 1: "informal"}
77
+ texts = [
78
+ "I like you. I love you",
79
+ "Hey, what's up?",
80
+ "Siema, co porabiasz?",
81
+ "I feel deep regret and sadness about the situation in international politics.",
82
+ ]
83
 
84
  # prepare the input
85
+ encoding = tokenizer(
86
+ texts,
87
+ add_special_tokens=True,
88
+ return_token_type_ids=True,
89
+ truncation=True,
90
+ padding="max_length",
91
+ return_tensors="pt",
92
+ )
93
 
94
  # inference
95
+ output = model(**encoding)
96
+
97
+ formality_scores = [
98
+ {id2formality[idx]: score for idx, score in enumerate(text_scores.tolist())}
99
+ for text_scores in output.logits.softmax(dim=1)
100
+ ]
101
+ formality_scores
102
+ ```
103
+
104
+ ```
105
+ [{'formal': 0.993225634098053, 'informal': 0.006774314679205418},
106
+ {'formal': 0.8807966113090515, 'informal': 0.1192033663392067},
107
+ {'formal': 0.936184287071228, 'informal': 0.06381577253341675},
108
+ {'formal': 0.9986615180969238, 'informal': 0.0013385231141000986}]
109
  ```
110
 
111
  ## Citation