Spaces:

Testys
/

YorubaCNN

Sleeping

App Files Files Community

Testys commited on Aug 28

Commit

31cea2f

•

1 Parent(s): 2210a0e

Upload main.py

Browse files

Files changed (1) hide show

main.py +30 -40

main.py CHANGED Viewed

@@ -36,50 +36,40 @@ sentiment_model = SentimentCNNModel(
 sentiment_model.load_state_dict(torch.load(sentiment_model_name, map_location=torch.device('cpu')))
 sentiment_model.eval()
-def analyze_text(text, window_size=512, stride=256):
-    # Initialize results
-    all_ner_labels = []
-    all_sentiments = []
-    # Process text in windows
-    for i in range(0, len(text), stride):
-        window = text[i:i+window_size]
-        # Tokenize input text for NER
-        ner_inputs = ner_tokenizer(window, return_tensors="pt", truncation=True, padding=True, max_length=window_size)
-        input_ids = ner_inputs['input_ids']
-        tokens = [ner_tokenizer.convert_ids_to_tokens(id) for id in input_ids.squeeze().tolist()]
-        # Perform Named Entity Recognition
-        with torch.no_grad():
-            ner_outputs = ner_model(**ner_inputs)
-        ner_predictions = torch.argmax(ner_outputs, dim=-1)[0]
-        ner_labels = ner_predictions.tolist()
-        ner_labels = [ner_config["id2labels"][str(label)] for label in ner_labels]
-        ner_labels = [f"{token}: {label}" for token, label in zip(tokens, ner_labels)]
-        all_ner_labels.extend(ner_labels)  # Adjust logic to merge overlapping windows appropriately
-        # Tokenize input text for sentiment analysis
-        sentiment_inputs = sentiment_tokenizer(window, return_tensors="pt", truncation=True, padding=True, max_length=window_size)
-        # Perform sentiment analysis
-        with torch.no_grad():
-            sentiment_outputs = sentiment_model(**sentiment_inputs)
-        sentiment_probabilities = torch.argmax(sentiment_outputs, dim=1)
-        sentiment_scores = sentiment_probabilities.tolist()
-        sentiment_id = sentiment_scores[0]
-        sentiment = sentiment_config["id2label"][str(sentiment_id)]
-        all_sentiments.append(sentiment)  # This needs logic to combine sentiment over windows
-    # For simplicity, aggregate sentiments by majority vote
-    from collections import Counter
-    sentiment_counts = Counter(all_sentiments)
-    final_sentiment = sentiment_counts.most_common(1)[0][0]
-    return all_ner_labels, final_sentiment
 def main():
     st.set_page_config(page_title="YorubaCNN for NER and Sentiment Analysis", layout="wide")
@@ -149,4 +139,4 @@ def main():
         """, unsafe_allow_html=True)
 if __name__ == "__main__":
-    main()

 sentiment_model.load_state_dict(torch.load(sentiment_model_name, map_location=torch.device('cpu')))
 sentiment_model.eval()
+def analyze_text(text):
+    # Tokenize input text for NER
+    ner_inputs = ner_tokenizer(text, return_tensors="pt")
+    input_ids = ner_inputs['input_ids']
+    # Converting token IDs back to tokens
+    tokens = [ner_tokenizer.convert_ids_to_tokens(id) for id in input_ids.squeeze().tolist()]
+    # Perform Named Entity Recognition
+    with torch.no_grad():
+        ner_outputs = ner_model(**ner_inputs)
+    ner_predictions = torch.argmax(ner_outputs, dim=-1)[0]
+    ner_labels = ner_predictions.tolist()
+    ner_labels = [ner_config["id2labels"][str(label)] for label in ner_labels]
+    #matching the tokens with the labels
+    ner_labels = [f"{token}: {label}" for token, label in zip(tokens, ner_labels)]
+    # Tokenize input text for sentiment analysis
+    sentiment_inputs = sentiment_tokenizer(text, return_tensors="pt")
+    # Perform sentiment analysis
+    with torch.no_grad():
+        sentiment_outputs = sentiment_model(**sentiment_inputs)
+    sentiment_probabilities = torch.argmax(sentiment_outputs, dim=1)
+    sentiment_scores = sentiment_probabilities.tolist()
+    sentiment_id = sentiment_scores[0]
+    sentiment = sentiment_config["id2label"][str(sentiment_id)]
+    return ner_labels, sentiment
 def main():
     st.set_page_config(page_title="YorubaCNN for NER and Sentiment Analysis", layout="wide")
         """, unsafe_allow_html=True)
 if __name__ == "__main__":
+    main()