File size: 5,175 Bytes
73b342f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import os
import json
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import pipeline
from huggingface_hub import HfApi, HfFolder
from transformers import AutoTokenizer, AutoConfig, AutoModelForTokenClassification, AutoModel


class CNNForNER(nn.Module):
    def __init__(self, pretrained_model_name, num_classes, max_length=128):
        super(CNNForNER, self).__init__()
        self.transformer = AutoModelForTokenClassification.from_pretrained(pretrained_model_name)
        self.max_length = max_length

        # Get the number of labels from the pretrained model
        pretrained_num_labels = self.transformer.num_labels

        self.conv1 = nn.Conv1d(in_channels=pretrained_num_labels, out_channels=256, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(in_channels=256, out_channels=128, kernel_size=3, padding=1)
        self.dropout = nn.Dropout(0.3)
        self.fc = nn.Linear(in_features=128, out_features=num_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.transformer(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits  # Shape: (batch_size, sequence_length, pretrained_num_labels)

        # Apply CNN layers
        logits = logits.permute(0, 2, 1)  # Shape: (batch_size, pretrained_num_labels, sequence_length)
        conv1_out = F.relu(self.conv1(logits))
        conv2_out = F.relu(self.conv2(conv1_out))
        conv2_out = self.dropout(conv2_out)
        conv2_out = conv2_out.permute(0, 2, 1)  # Shape: (batch_size, sequence_length, 128)
        final_logits = self.fc(conv2_out)  # Shape: (batch_size, sequence_length, num_classes)
        return final_logits



import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoModel

class SentimentCNNModel(nn.Module):
    def __init__(self, transformer_model_name, num_classes, cnn_out_channels=100, cnn_kernel_sizes=[3, 5, 7]):
        super(SentimentCNNModel, self).__init__()
        # Load pre-trained transformer model
        self.transformer = AutoModel.from_pretrained(transformer_model_name)

        # CNN layers with multiple kernel sizes
        self.convs = nn.ModuleList([
            nn.Conv1d(in_channels=self.transformer.config.hidden_size,
                      out_channels=cnn_out_channels,
                      kernel_size=k)
            for k in cnn_kernel_sizes
        ])

        # Dropout layer
        self.dropout = nn.Dropout(0.5)

        # Fully connected layer
        self.fc = nn.Linear(len(cnn_kernel_sizes) * cnn_out_channels, num_classes)

    def forward(self, input_ids, attention_mask):
        # Get hidden states from the transformer model
        transformer_outputs = self.transformer(input_ids=input_ids, attention_mask=attention_mask)
        hidden_states = transformer_outputs.last_hidden_state  # Shape: (batch_size, seq_len, hidden_size)

        # Transpose for CNN input: (batch_size, hidden_size, seq_len)
        hidden_states = hidden_states.transpose(1, 2)

        # Apply convolution and pooling
        conv_outputs = [torch.relu(conv(hidden_states)) for conv in self.convs]
        pooled_outputs = [torch.max(output, dim=2)[0] for output in conv_outputs]

        # Concatenate pooled outputs and apply dropout
        cat_output = torch.cat(pooled_outputs, dim=1)
        cat_output = self.dropout(cat_output)

        # Final classification
        logits = self.fc(cat_output)

        return logits


class SentimentCNNModel(nn.Module):
    def __init__(self, transformer_model_name, num_classes, cnn_out_channels=100, cnn_kernel_sizes=[3, 5, 7]):
        super(SentimentCNNModel, self).__init__()
        # Load pre-trained transformer model
        self.transformer = AutoModel.from_pretrained(transformer_model_name)

        # CNN layers with multiple kernel sizes
        self.convs = nn.ModuleList([
            nn.Conv1d(in_channels=self.transformer.config.hidden_size,
                      out_channels=cnn_out_channels,
                      kernel_size=k)
            for k in cnn_kernel_sizes
        ])

        # Dropout layer
        self.dropout = nn.Dropout(0.5)

        # Fully connected layer
        self.fc = nn.Linear(len(cnn_kernel_sizes) * cnn_out_channels, num_classes)

    def forward(self, input_ids, attention_mask):
        # Get hidden states from the transformer model
        transformer_outputs = self.transformer(input_ids=input_ids, attention_mask=attention_mask)
        hidden_states = transformer_outputs.last_hidden_state  # Shape: (batch_size, seq_len, hidden_size)

        # Transpose for CNN input: (batch_size, hidden_size, seq_len)
        hidden_states = hidden_states.transpose(1, 2)

        # Apply convolution and pooling
        conv_outputs = [torch.relu(conv(hidden_states)) for conv in self.convs]
        pooled_outputs = [torch.max(output, dim=2)[0] for output in conv_outputs]

        # Concatenate pooled outputs and apply dropout
        cat_output = torch.cat(pooled_outputs, dim=1)
        cat_output = self.dropout(cat_output)

        # Final classification
        logits = self.fc(cat_output)

        return logits