Kithogue commited on
Commit
32453ac
β€’
1 Parent(s): bde5804

Add dependencies

Browse files
Files changed (2) hide show
  1. app.py +10 -1
  2. constraints.py +68 -0
app.py CHANGED
@@ -5,13 +5,22 @@ import pandas as pd
5
  import torch
6
  import tqdm
7
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
8
- from baseline_BERT import id2label
9
  import gradio as gr
 
10
 
11
  model_ckpt = "Kithogue/2-lvl-events-multilingual"
12
  tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
13
 
14
 
 
 
 
 
 
 
 
 
 
15
  def get_inference(sample):
16
  model_hf = AutoModelForSequenceClassification.from_pretrained(model_ckpt)
17
  encoding = tokenizer(sample, return_tensors="pt")
 
5
  import torch
6
  import tqdm
7
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
 
8
  import gradio as gr
9
+ import constraints
10
 
11
  model_ckpt = "Kithogue/2-lvl-events-multilingual"
12
  tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
13
 
14
 
15
+ def get_labels2id(is_two_layer):
16
+ """sorted to preserve the order"""
17
+ labels = constraints.get_all_labels(is_two_layer)
18
+ return {label: i for i, label in enumerate(labels)}
19
+
20
+
21
+ id2label = {v: k for k, v in get_labels2id(True).items()}
22
+
23
+
24
  def get_inference(sample):
25
  model_hf = AutoModelForSequenceClassification.from_pretrained(model_ckpt)
26
  encoding = tokenizer(sample, return_tensors="pt")
constraints.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """A module to produce a 3-dimension tensor of size [I, J, K], where I, J, K are number of classes on each level.
2
+ If there is a path from i to j to k, then the number is 1, otherwise 0.
3
+ Based on the lowest level of the taxonomy."""
4
+
5
+ import jsonlines
6
+ import numpy as np
7
+ import torch
8
+
9
+ if torch.cuda.is_available():
10
+ device = 'cuda'
11
+ else:
12
+ 'cpu'
13
+
14
+
15
+ def get_layered_labels(is_two_layer: bool = False):
16
+ path_to_train = '/home/kisa/events/event-detection-hierarchical/data/RAMS/flat/train.jsonl'
17
+ with jsonlines.open(path_to_train, 'r') as f:
18
+ all_labels = []
19
+ for ff in f:
20
+ all_labels.append(ff['events']['type'])
21
+ all_labels = list(set(all_labels))
22
+ if is_two_layer:
23
+ return [get_two_layers(target) for target in all_labels]
24
+ else:
25
+ return [target.split('.') for target in all_labels]
26
+
27
+
28
+ def get_two_layers(target):
29
+ return target.split('.')[:2]
30
+
31
+
32
+ def get_all_labels(is_two_layer):
33
+ layered_labels = get_layered_labels(is_two_layer)
34
+ all_labels = [trgt for target in layered_labels for trgt in target]
35
+ labels = sorted(list(set(all_labels)))
36
+ if 'n/a' in labels:
37
+ labels.remove('n/a')
38
+ return labels
39
+
40
+
41
+ def get_labels2id(is_two_layer):
42
+ """sorted to preserve the order"""
43
+ labels = get_all_labels(is_two_layer)
44
+ return {label: i for i, label in enumerate(labels)}
45
+
46
+
47
+ def get_all_paths(is_two_layer):
48
+ labels2id = get_labels2id(is_two_layer)
49
+ layered_labels = get_layered_labels(is_two_layer)
50
+ layered_labels_pos = []
51
+ """Filtering out n/a class.
52
+ We assume that texts with such label have only 2 coarser layers as a valid label path,
53
+ which is already included in the valid path constraint"""
54
+ for target in layered_labels:
55
+ target_pos = []
56
+ for target_word in target:
57
+ if target_word == 'n/a':
58
+ continue
59
+ else:
60
+ target_pos.append(labels2id[target_word])
61
+ if target_pos not in layered_labels_pos:
62
+ layered_labels_pos.append(target_pos)
63
+ array_dim = len(labels2id)
64
+ path_matrix = np.zeros((array_dim, array_dim, array_dim))
65
+ for path in layered_labels_pos:
66
+ if len(path) > 2:
67
+ path_matrix[path[0], path[1], path[2]] = 1
68
+ return torch.Tensor(path_matrix).to(device)