Testys commited on
Commit
5af3f8a
1 Parent(s): 30b1f4c

changes based on stuffs

Browse files
Files changed (1) hide show
  1. main.py +25 -20
main.py CHANGED
@@ -7,19 +7,26 @@ import pandas as pd
7
  import altair as alt
8
 
9
  # Load the Yoruba NER model
10
- ner_model_name = "./my_model/pytorch_model.bin"
11
- model_ner = "Testys/cnn_yor_ner"
12
- ner_tokenizer = AutoTokenizer.from_pretrained(model_ner)
13
- with open("./my_model/config.json", "r") as f:
14
- ner_config = json.load(f)
15
-
16
- ner_model = CNNForNER(
17
- pretrained_model_name=ner_config["pretrained_model_name"],
18
- num_classes=ner_config["num_classes"]
19
- )
20
- ner_model.load_state_dict(torch.load(ner_model_name, map_location=torch.device('cpu')))
 
 
 
 
 
 
21
  ner_model.eval()
22
 
 
23
  # Load the Yoruba sentiment analysis model
24
  sentiment_model_name = "./sent_model/sent_pytorch_model.bin"
25
  model_sent = "Testys/cnn_sent_yor"
@@ -39,21 +46,19 @@ sentiment_model.eval()
39
 
40
  def analyze_text(text):
41
  # Tokenize input text for NER
42
- ner_inputs = ner_tokenizer(text, max_length= 514, truncation= True, padding= "max_length", return_tensors="pt")
43
-
44
- input_ids = ner_inputs['input_ids']
45
-
46
- # Converting token IDs back to tokens
47
- tokens = [ner_tokenizer.convert_ids_to_tokens(id) for id in input_ids.squeeze().tolist()]
48
-
49
 
50
  # Perform Named Entity Recognition
 
51
  with torch.no_grad():
52
  ner_outputs = ner_model(**ner_inputs)
53
 
54
- ner_predictions = torch.argmax(ner_outputs, dim=-1)[0]
 
 
55
  ner_labels = ner_predictions.tolist()
56
- ner_labels = [ner_config["id2labels"][str(label)] for label in ner_labels]
 
57
 
58
  #matching the tokens with the labels
59
  ner_labels = [f"{token}: {label}" for token, label in zip(tokens, ner_labels)]
 
7
  import altair as alt
8
 
9
  # Load the Yoruba NER model
10
+ # ner_model_name = "./my_model/pytorch_model.bin"
11
+ # model_ner = "Testys/cnn_yor_ner"
12
+ # ner_tokenizer = AutoTokenizer.from_pretrained(model_ner)
13
+ # with open("./my_model/config.json", "r") as f:
14
+ # ner_config = json.load(f)
15
+
16
+ # ner_model = CNNForNER(
17
+ # pretrained_model_name=ner_config["pretrained_model_name"],
18
+ # num_classes=ner_config["num_classes"]
19
+ # )
20
+ # ner_model.load_state_dict(torch.load(ner_model_name, map_location=torch.device('cpu')))
21
+ # ner_model.eval()
22
+
23
+ ner_model = AutoModelForTokenClassification.from_pretrained("masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0")
24
+ ner_tokenizers = AutoTokenizer.from_pretrained("masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0")
25
+ ner_config = ner_model.config
26
+
27
  ner_model.eval()
28
 
29
+
30
  # Load the Yoruba sentiment analysis model
31
  sentiment_model_name = "./sent_model/sent_pytorch_model.bin"
32
  model_sent = "Testys/cnn_sent_yor"
 
46
 
47
  def analyze_text(text):
48
  # Tokenize input text for NER
49
+ ner_inputs = ner_tokenizer(text, return_tensors="pt")
 
 
 
 
 
 
50
 
51
  # Perform Named Entity Recognition
52
+ tokens = ner_tokenizer.convert_ids_to_tokens(ner_inputs.input_ids[0])
53
  with torch.no_grad():
54
  ner_outputs = ner_model(**ner_inputs)
55
 
56
+ print(ner_outputs)
57
+
58
+ ner_predictions = torch.argmax(ner_outputs.logits, dim=-1)[0]
59
  ner_labels = ner_predictions.tolist()
60
+ print(ner_labels)
61
+ ner_labels = [ner_config.id2label[label] for label in ner_labels]
62
 
63
  #matching the tokens with the labels
64
  ner_labels = [f"{token}: {label}" for token, label in zip(tokens, ner_labels)]