Spaces:

Kithogue
/

Event_Detection_2-LVL_Prototype

Runtime error

App Files Files Community

Kithogue commited on Aug 9, 2023

Commit

32453ac

•

1 Parent(s): bde5804

Add dependencies

Browse files

Files changed (2) hide show

app.py +10 -1
constraints.py +68 -0

app.py CHANGED Viewed

@@ -5,13 +5,22 @@ import pandas as pd
 import torch
 import tqdm
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
-from baseline_BERT import id2label
 import gradio as gr
 model_ckpt = "Kithogue/2-lvl-events-multilingual"
 tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
 def get_inference(sample):
     model_hf = AutoModelForSequenceClassification.from_pretrained(model_ckpt)
     encoding = tokenizer(sample, return_tensors="pt")

 import torch
 import tqdm
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import gradio as gr
+import constraints
 model_ckpt = "Kithogue/2-lvl-events-multilingual"
 tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
+def get_labels2id(is_two_layer):
+    """sorted to preserve the order"""
+    labels = constraints.get_all_labels(is_two_layer)
+    return {label: i for i, label in enumerate(labels)}
+id2label = {v: k for k, v in get_labels2id(True).items()}
 def get_inference(sample):
     model_hf = AutoModelForSequenceClassification.from_pretrained(model_ckpt)
     encoding = tokenizer(sample, return_tensors="pt")

constraints.py ADDED Viewed

	@@ -0,0 +1,68 @@

+"""A module to produce a 3-dimension tensor of size [I, J, K], where I, J, K are number of classes on each level.
+If there is a path from i to j to k, then the number is 1, otherwise 0.
+Based on the lowest level of the taxonomy."""
+import jsonlines
+import numpy as np
+import torch
+if torch.cuda.is_available():
+    device = 'cuda'
+else:
+    'cpu'
+def get_layered_labels(is_two_layer: bool = False):
+    path_to_train = '/home/kisa/events/event-detection-hierarchical/data/RAMS/flat/train.jsonl'
+    with jsonlines.open(path_to_train, 'r') as f:
+        all_labels = []
+        for ff in f:
+            all_labels.append(ff['events']['type'])
+    all_labels = list(set(all_labels))
+    if is_two_layer:
+        return [get_two_layers(target) for target in all_labels]
+    else:
+        return [target.split('.') for target in all_labels]
+def get_two_layers(target):
+    return target.split('.')[:2]
+def get_all_labels(is_two_layer):
+    layered_labels = get_layered_labels(is_two_layer)
+    all_labels = [trgt for target in layered_labels for trgt in target]
+    labels = sorted(list(set(all_labels)))
+    if 'n/a' in labels:
+        labels.remove('n/a')
+    return labels
+def get_labels2id(is_two_layer):
+    """sorted to preserve the order"""
+    labels = get_all_labels(is_two_layer)
+    return {label: i for i, label in enumerate(labels)}
+def get_all_paths(is_two_layer):
+    labels2id = get_labels2id(is_two_layer)
+    layered_labels = get_layered_labels(is_two_layer)
+    layered_labels_pos = []
+    """Filtering out n/a class.
+    We assume that texts with such label have only 2 coarser layers as a valid label path,
+    which is already included in the valid path constraint"""
+    for target in layered_labels:
+        target_pos = []
+        for target_word in target:
+            if target_word == 'n/a':
+                continue
+            else:
+                target_pos.append(labels2id[target_word])
+            if target_pos not in layered_labels_pos:
+                layered_labels_pos.append(target_pos)
+    array_dim = len(labels2id)
+    path_matrix = np.zeros((array_dim, array_dim, array_dim))
+    for path in layered_labels_pos:
+        if len(path) > 2:
+            path_matrix[path[0], path[1], path[2]] = 1
+    return torch.Tensor(path_matrix).to(device)