Prositron commited on
Commit
0ae85e9
·
verified ·
1 Parent(s): e56b819

Update train_model.py

Browse files
Files changed (1) hide show
  1. train_model.py +103 -103
train_model.py CHANGED
@@ -1,103 +1,103 @@
1
- import torch
2
- import torch.nn as nn
3
- import torch.optim as optim
4
- from torch.utils.data import DataLoader
5
- from datasets import load_dataset
6
- from transformers import AutoTokenizer
7
- from tensor_network import FourDimensionalTransformer # Adjust based on your model's location
8
-
9
- # List of dataset identifiers
10
- dataset_ids = [
11
- "prithivMLmods/Deepthink-Reasoning",
12
- "ewok-core/ewok-core-1.0",
13
- "MuskumPillerum/General-Knowledge",
14
- "fblgit/tree-of-knowledge",
15
- "CohereForAI/aya_dataset",
16
- "AtlasUnified/Atlas-Reasoning",
17
- "livebench/reasoning",
18
- "SkunkworksAI/reasoning-0.01",
19
- "KingNish/reasoning-base-20k",
20
- "RLHFlow/HH-RLHF-Helpful-standard",
21
- "MBZUAI/ArabicMMLU"
22
- ]
23
-
24
- # Load datasets
25
- datasets = [load_dataset(dataset_id) for dataset_id in dataset_ids]
26
-
27
- # Initialize tokenizer
28
- tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') # Replace with your model's tokenizer
29
-
30
- # Tokenize datasets
31
- def tokenize_function(examples):
32
- return tokenizer(examples['text'], padding='max_length', truncation=True, max_length=128)
33
-
34
- tokenized_datasets = [dataset.map(tokenize_function, batched=True) for dataset in datasets]
35
-
36
-
37
- # Prepare DataLoader
38
- def prepare_dataloader(dataset, batch_size=32):
39
- dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
40
- return DataLoader(dataset, batch_size=batch_size, shuffle=True)
41
-
42
- train_dataloaders = [prepare_dataloader(dataset['train']) for dataset in tokenized_datasets]
43
- val_dataloaders = [prepare_dataloader(dataset['validation']) for dataset in tokenized_datasets]
44
-
45
-
46
- # Model setup
47
- model = FourDimensionalTransformer(
48
- num_layers=16,
49
- embed_dim=7,
50
- num_heads=1,
51
- num_extra_tokens=16,
52
- num_classes=10 # Adjust based on your specific task
53
- )
54
-
55
- # Loss function and optimizer
56
- criterion = nn.CrossEntropyLoss()
57
- optimizer = optim.Adam(model.parameters(), lr=1e-4) # Using Adam optimizer with a learning rate of 1e-4
58
-
59
- # Training loop
60
- def train(model, train_dataloaders, val_dataloaders, num_epochs=10):
61
- for epoch in range(num_epochs):
62
- model.train()
63
- total_loss = 0
64
- for dataloader in train_dataloaders:
65
- for batch in dataloader:
66
- input_ids = batch['input_ids']
67
- attention_mask = batch['attention_mask']
68
- labels = batch['label']
69
-
70
- optimizer.zero_grad()
71
- outputs = model(input_ids, attention_mask)
72
- loss = criterion(outputs, labels)
73
- loss.backward()
74
- optimizer.step()
75
-
76
- total_loss += loss.item()
77
-
78
- avg_loss = total_loss / len(dataloader)
79
- print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')
80
-
81
- # Validation
82
- model.eval()
83
- total_correct = 0
84
- with torch.no_grad():
85
- for dataloader in val_dataloaders:
86
- for batch in dataloader:
87
- input_ids = batch['input_ids']
88
- attention_mask = batch['attention_mask']
89
- labels = batch['label']
90
-
91
- outputs = model(input_ids, attention_mask)
92
- _, predicted = torch.max(outputs, 1)
93
- total_correct += (predicted == labels).sum().item()
94
-
95
- accuracy = total_correct / len(dataloader.dataset)
96
- print(f'Validation Accuracy: {accuracy:.4f}')
97
-
98
- # Save the trained model
99
- torch.save(model.state_dict(), 'trained_model.pth')
100
-
101
-
102
- # Train the model
103
- train(model, train_dataloaders, val_dataloaders)
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ from torch.utils.data import DataLoader
5
+ from datasets import load_dataset
6
+ from transformers import AutoTokenizer
7
+ from tensor_network import FourDimensionalTransformer # Adjust based on your model's location
8
+
9
+ # List of dataset identifiers
10
+ dataset_ids = [
11
+ "prithivMLmods/Deepthink-Reasoning",
12
+ "ewok-core/ewok-core-1.0",
13
+ "MuskumPillerum/General-Knowledge",
14
+ "fblgit/tree-of-knowledge",
15
+ "CohereForAI/aya_dataset",
16
+ "AtlasUnified/Atlas-Reasoning",
17
+ "livebench/reasoning",
18
+ "SkunkworksAI/reasoning-0.01",
19
+ "KingNish/reasoning-base-20k",
20
+ "RLHFlow/HH-RLHF-Helpful-standard",
21
+ "yitingxie/rlhf-reward-datasets"
22
+ ]
23
+
24
+ # Load datasets
25
+ datasets = [load_dataset(dataset_id) for dataset_id in dataset_ids]
26
+
27
+ # Initialize tokenizer
28
+ tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') # Replace with your model's tokenizer
29
+
30
+ # Tokenize datasets
31
+ def tokenize_function(examples):
32
+ return tokenizer(examples['text'], padding='max_length', truncation=True, max_length=128)
33
+
34
+ tokenized_datasets = [dataset.map(tokenize_function, batched=True) for dataset in datasets]
35
+
36
+
37
+ # Prepare DataLoader
38
+ def prepare_dataloader(dataset, batch_size=32):
39
+ dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
40
+ return DataLoader(dataset, batch_size=batch_size, shuffle=True)
41
+
42
+ train_dataloaders = [prepare_dataloader(dataset['train']) for dataset in tokenized_datasets]
43
+ val_dataloaders = [prepare_dataloader(dataset['validation']) for dataset in tokenized_datasets]
44
+
45
+
46
+ # Model setup
47
+ model = FourDimensionalTransformer(
48
+ num_layers=16,
49
+ embed_dim=7,
50
+ num_heads=1,
51
+ num_extra_tokens=16,
52
+ num_classes=10 # Adjust based on your specific task
53
+ )
54
+
55
+ # Loss function and optimizer
56
+ criterion = nn.CrossEntropyLoss()
57
+ optimizer = optim.Adam(model.parameters(), lr=1e-4) # Using Adam optimizer with a learning rate of 1e-4
58
+
59
+ # Training loop
60
+ def train(model, train_dataloaders, val_dataloaders, num_epochs=10):
61
+ for epoch in range(num_epochs):
62
+ model.train()
63
+ total_loss = 0
64
+ for dataloader in train_dataloaders:
65
+ for batch in dataloader:
66
+ input_ids = batch['input_ids']
67
+ attention_mask = batch['attention_mask']
68
+ labels = batch['label']
69
+
70
+ optimizer.zero_grad()
71
+ outputs = model(input_ids, attention_mask)
72
+ loss = criterion(outputs, labels)
73
+ loss.backward()
74
+ optimizer.step()
75
+
76
+ total_loss += loss.item()
77
+
78
+ avg_loss = total_loss / len(dataloader)
79
+ print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')
80
+
81
+ # Validation
82
+ model.eval()
83
+ total_correct = 0
84
+ with torch.no_grad():
85
+ for dataloader in val_dataloaders:
86
+ for batch in dataloader:
87
+ input_ids = batch['input_ids']
88
+ attention_mask = batch['attention_mask']
89
+ labels = batch['label']
90
+
91
+ outputs = model(input_ids, attention_mask)
92
+ _, predicted = torch.max(outputs, 1)
93
+ total_correct += (predicted == labels).sum().item()
94
+
95
+ accuracy = total_correct / len(dataloader.dataset)
96
+ print(f'Validation Accuracy: {accuracy:.4f}')
97
+
98
+ # Save the trained model
99
+ torch.save(model.state_dict(), 'trained_model.pth')
100
+
101
+
102
+ # Train the model
103
+ train(model, train_dataloaders, val_dataloaders)