from cnn import CNN,Model
from utils import loader_train
import torch

#超参数设置
epochs = 5
learning_rate = 5e-4
device = torch.device('cuda')

#model = CNN().to(device)
model = Model().to(device)
#AdamW,学习率5e-4
optimizer = torch.optim.AdamW(model.parameters(),lr=learning_rate)
#学习率衰减90%
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)

#交叉熵
criterion = torch.nn.CrossEntropyLoss()

model.train()
for epoch in range(0,epochs):
    if epoch == 3:
        scheduler.step()

    print('轮数',epoch+1)


    for i ,(input_ids, attention_mask, token_type_ids, labels) in enumerate(loader_train):
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            token_type_ids = token_type_ids.to(device)
            labels = labels.to(device)
            out = model(input_ids=input_ids,attention_mask=attention_mask,token_type_ids=token_type_ids).to(device)
            loss = criterion(out,labels)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            if i % 1 == 0:
                out = out.argmax(dim=1)
                print(out,labels,(out==labels).sum().item(),len(labels))
                accuracy = (out==labels).sum().item()/len(labels)
                print(i,loss.item(),accuracy)
            
    
#torch.save(model.state_dict(),'net_params.pth')
torch.save(model.state_dict(),'cls_params.pth')