Update model.py
Browse files
model.py
CHANGED
@@ -6,12 +6,13 @@ import torch.nn.functional as F
|
|
6 |
from typing import List, Dict, Any, Optional
|
7 |
from collections import defaultdict
|
8 |
from accelerate import Accelerator
|
|
|
|
|
9 |
|
10 |
class DynamicModel(nn.Module):
|
11 |
def __init__(self, sections: Dict[str, List[Dict[str, Any]]]):
|
12 |
super(DynamicModel, self).__init__()
|
13 |
self.sections = nn.ModuleDict()
|
14 |
-
|
15 |
if not sections:
|
16 |
sections = {
|
17 |
'default': [{
|
@@ -22,7 +23,6 @@ class DynamicModel(nn.Module):
|
|
22 |
'dropout': 0.1
|
23 |
}]
|
24 |
}
|
25 |
-
|
26 |
for section_name, layers in sections.items():
|
27 |
self.sections[section_name] = nn.ModuleList()
|
28 |
for layer_params in layers:
|
@@ -32,10 +32,8 @@ class DynamicModel(nn.Module):
|
|
32 |
def create_layer(self, layer_params: Dict[str, Any]) -> nn.Module:
|
33 |
layers = []
|
34 |
layers.append(nn.Linear(layer_params['input_size'], layer_params['output_size']))
|
35 |
-
|
36 |
if layer_params.get('batch_norm', False):
|
37 |
layers.append(nn.BatchNorm1d(layer_params['output_size']))
|
38 |
-
|
39 |
activation = layer_params.get('activation', 'relu')
|
40 |
if activation == 'relu':
|
41 |
layers.append(nn.ReLU(inplace=True))
|
@@ -49,23 +47,17 @@ class DynamicModel(nn.Module):
|
|
49 |
layers.append(nn.ELU(alpha=1.0, inplace=True))
|
50 |
elif activation is not None:
|
51 |
raise ValueError(f"Unsupported activation function: {activation}")
|
52 |
-
|
53 |
if dropout_rate := layer_params.get('dropout', 0.0):
|
54 |
layers.append(nn.Dropout(p=dropout_rate))
|
55 |
-
|
56 |
if hidden_layers := layer_params.get('hidden_layers', []):
|
57 |
for hidden_layer_params in hidden_layers:
|
58 |
layers.append(self.create_layer(hidden_layer_params))
|
59 |
-
|
60 |
if layer_params.get('memory_augmentation', True):
|
61 |
layers.append(MemoryAugmentationLayer(layer_params['output_size']))
|
62 |
-
|
63 |
if layer_params.get('hybrid_attention', True):
|
64 |
layers.append(HybridAttentionLayer(layer_params['output_size']))
|
65 |
-
|
66 |
if layer_params.get('dynamic_flash_attention', True):
|
67 |
layers.append(DynamicFlashAttentionLayer(layer_params['output_size']))
|
68 |
-
|
69 |
return nn.Sequential(*layers)
|
70 |
|
71 |
def forward(self, x: torch.Tensor, section_name: Optional[str] = None) -> torch.Tensor:
|
@@ -111,38 +103,30 @@ class DynamicFlashAttentionLayer(nn.Module):
|
|
111 |
def parse_xml_file(file_path: str) -> List[Dict[str, Any]]:
|
112 |
tree = ET.parse(file_path)
|
113 |
root = tree.getroot()
|
114 |
-
|
115 |
layers = []
|
116 |
for layer in root.findall('.//layer'):
|
117 |
layer_params = {}
|
118 |
layer_params['input_size'] = int(layer.get('input_size', 128))
|
119 |
layer_params['output_size'] = int(layer.get('output_size', 256))
|
120 |
layer_params['activation'] = layer.get('activation', 'relu').lower()
|
121 |
-
|
122 |
if layer_params['activation'] not in ['relu', 'tanh', 'sigmoid', 'none']:
|
123 |
raise ValueError(f"Unsupported activation function: {layer_params['activation']}")
|
124 |
-
|
125 |
if layer_params['input_size'] <= 0 or layer_params['output_size'] <= 0:
|
126 |
raise ValueError("Layer dimensions must be positive integers")
|
127 |
-
|
128 |
layers.append(layer_params)
|
129 |
-
|
130 |
if not layers:
|
131 |
layers.append({
|
132 |
'input_size': 128,
|
133 |
'output_size': 256,
|
134 |
'activation': 'relu'
|
135 |
})
|
136 |
-
|
137 |
return layers
|
138 |
|
139 |
def create_model_from_folder(folder_path: str) -> DynamicModel:
|
140 |
sections = defaultdict(list)
|
141 |
-
|
142 |
if not os.path.exists(folder_path):
|
143 |
print(f"Warning: Folder {folder_path} does not exist. Creating model with default configuration.")
|
144 |
return DynamicModel({})
|
145 |
-
|
146 |
xml_files_found = False
|
147 |
for root, dirs, files in os.walk(folder_path):
|
148 |
for file in files:
|
@@ -155,64 +139,59 @@ def create_model_from_folder(folder_path: str) -> DynamicModel:
|
|
155 |
sections[section_name].extend(layers)
|
156 |
except Exception as e:
|
157 |
print(f"Error processing {file_path}: {str(e)}")
|
158 |
-
|
159 |
if not xml_files_found:
|
160 |
print("Warning: No XML files found. Creating model with default configuration.")
|
161 |
return DynamicModel({})
|
162 |
-
|
163 |
return DynamicModel(dict(sections))
|
164 |
|
165 |
def main():
|
166 |
folder_path = 'data'
|
167 |
model = create_model_from_folder(folder_path)
|
168 |
-
|
169 |
print(f"Created dynamic PyTorch model with sections: {list(model.sections.keys())}")
|
170 |
-
|
171 |
# Print the model architecture
|
172 |
print(model)
|
173 |
-
|
174 |
first_section = next(iter(model.sections.keys()))
|
175 |
first_layer = model.sections[first_section][0]
|
176 |
input_features = first_layer[0].in_features
|
177 |
-
|
178 |
# Ensure the input tensor size matches the expected input size
|
179 |
sample_input = torch.randn(1, input_features)
|
180 |
output = model(sample_input)
|
181 |
print(f"Sample output shape: {output.shape}")
|
182 |
|
183 |
accelerator = Accelerator()
|
184 |
-
|
185 |
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
|
186 |
criterion = nn.CrossEntropyLoss()
|
187 |
num_epochs = 10
|
188 |
-
|
189 |
-
dataset = torch.utils.data.TensorDataset(
|
190 |
torch.randn(100, input_features),
|
191 |
torch.randint(0, 2, (100,))
|
192 |
)
|
193 |
-
train_dataloader =
|
194 |
-
dataset,
|
195 |
-
batch_size=
|
196 |
shuffle=True
|
197 |
)
|
198 |
|
199 |
model, optimizer, train_dataloader = accelerator.prepare(
|
200 |
-
model,
|
201 |
-
optimizer,
|
202 |
train_dataloader
|
203 |
)
|
204 |
|
|
|
|
|
205 |
for epoch in range(num_epochs):
|
206 |
model.train()
|
207 |
total_loss = 0
|
208 |
for batch_idx, (inputs, labels) in enumerate(train_dataloader):
|
209 |
optimizer.zero_grad()
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
|
|
|
|
214 |
total_loss += loss.item()
|
215 |
-
|
216 |
avg_loss = total_loss / len(train_dataloader)
|
217 |
print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss:.4f}")
|
218 |
|
|
|
6 |
from typing import List, Dict, Any, Optional
|
7 |
from collections import defaultdict
|
8 |
from accelerate import Accelerator
|
9 |
+
from torch.utils.data import DataLoader, TensorDataset
|
10 |
+
from torch.cuda.amp import GradScaler, autocast
|
11 |
|
12 |
class DynamicModel(nn.Module):
|
13 |
def __init__(self, sections: Dict[str, List[Dict[str, Any]]]):
|
14 |
super(DynamicModel, self).__init__()
|
15 |
self.sections = nn.ModuleDict()
|
|
|
16 |
if not sections:
|
17 |
sections = {
|
18 |
'default': [{
|
|
|
23 |
'dropout': 0.1
|
24 |
}]
|
25 |
}
|
|
|
26 |
for section_name, layers in sections.items():
|
27 |
self.sections[section_name] = nn.ModuleList()
|
28 |
for layer_params in layers:
|
|
|
32 |
def create_layer(self, layer_params: Dict[str, Any]) -> nn.Module:
|
33 |
layers = []
|
34 |
layers.append(nn.Linear(layer_params['input_size'], layer_params['output_size']))
|
|
|
35 |
if layer_params.get('batch_norm', False):
|
36 |
layers.append(nn.BatchNorm1d(layer_params['output_size']))
|
|
|
37 |
activation = layer_params.get('activation', 'relu')
|
38 |
if activation == 'relu':
|
39 |
layers.append(nn.ReLU(inplace=True))
|
|
|
47 |
layers.append(nn.ELU(alpha=1.0, inplace=True))
|
48 |
elif activation is not None:
|
49 |
raise ValueError(f"Unsupported activation function: {activation}")
|
|
|
50 |
if dropout_rate := layer_params.get('dropout', 0.0):
|
51 |
layers.append(nn.Dropout(p=dropout_rate))
|
|
|
52 |
if hidden_layers := layer_params.get('hidden_layers', []):
|
53 |
for hidden_layer_params in hidden_layers:
|
54 |
layers.append(self.create_layer(hidden_layer_params))
|
|
|
55 |
if layer_params.get('memory_augmentation', True):
|
56 |
layers.append(MemoryAugmentationLayer(layer_params['output_size']))
|
|
|
57 |
if layer_params.get('hybrid_attention', True):
|
58 |
layers.append(HybridAttentionLayer(layer_params['output_size']))
|
|
|
59 |
if layer_params.get('dynamic_flash_attention', True):
|
60 |
layers.append(DynamicFlashAttentionLayer(layer_params['output_size']))
|
|
|
61 |
return nn.Sequential(*layers)
|
62 |
|
63 |
def forward(self, x: torch.Tensor, section_name: Optional[str] = None) -> torch.Tensor:
|
|
|
103 |
def parse_xml_file(file_path: str) -> List[Dict[str, Any]]:
|
104 |
tree = ET.parse(file_path)
|
105 |
root = tree.getroot()
|
|
|
106 |
layers = []
|
107 |
for layer in root.findall('.//layer'):
|
108 |
layer_params = {}
|
109 |
layer_params['input_size'] = int(layer.get('input_size', 128))
|
110 |
layer_params['output_size'] = int(layer.get('output_size', 256))
|
111 |
layer_params['activation'] = layer.get('activation', 'relu').lower()
|
|
|
112 |
if layer_params['activation'] not in ['relu', 'tanh', 'sigmoid', 'none']:
|
113 |
raise ValueError(f"Unsupported activation function: {layer_params['activation']}")
|
|
|
114 |
if layer_params['input_size'] <= 0 or layer_params['output_size'] <= 0:
|
115 |
raise ValueError("Layer dimensions must be positive integers")
|
|
|
116 |
layers.append(layer_params)
|
|
|
117 |
if not layers:
|
118 |
layers.append({
|
119 |
'input_size': 128,
|
120 |
'output_size': 256,
|
121 |
'activation': 'relu'
|
122 |
})
|
|
|
123 |
return layers
|
124 |
|
125 |
def create_model_from_folder(folder_path: str) -> DynamicModel:
|
126 |
sections = defaultdict(list)
|
|
|
127 |
if not os.path.exists(folder_path):
|
128 |
print(f"Warning: Folder {folder_path} does not exist. Creating model with default configuration.")
|
129 |
return DynamicModel({})
|
|
|
130 |
xml_files_found = False
|
131 |
for root, dirs, files in os.walk(folder_path):
|
132 |
for file in files:
|
|
|
139 |
sections[section_name].extend(layers)
|
140 |
except Exception as e:
|
141 |
print(f"Error processing {file_path}: {str(e)}")
|
|
|
142 |
if not xml_files_found:
|
143 |
print("Warning: No XML files found. Creating model with default configuration.")
|
144 |
return DynamicModel({})
|
|
|
145 |
return DynamicModel(dict(sections))
|
146 |
|
147 |
def main():
|
148 |
folder_path = 'data'
|
149 |
model = create_model_from_folder(folder_path)
|
|
|
150 |
print(f"Created dynamic PyTorch model with sections: {list(model.sections.keys())}")
|
|
|
151 |
# Print the model architecture
|
152 |
print(model)
|
|
|
153 |
first_section = next(iter(model.sections.keys()))
|
154 |
first_layer = model.sections[first_section][0]
|
155 |
input_features = first_layer[0].in_features
|
|
|
156 |
# Ensure the input tensor size matches the expected input size
|
157 |
sample_input = torch.randn(1, input_features)
|
158 |
output = model(sample_input)
|
159 |
print(f"Sample output shape: {output.shape}")
|
160 |
|
161 |
accelerator = Accelerator()
|
|
|
162 |
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
|
163 |
criterion = nn.CrossEntropyLoss()
|
164 |
num_epochs = 10
|
165 |
+
dataset = TensorDataset(
|
|
|
166 |
torch.randn(100, input_features),
|
167 |
torch.randint(0, 2, (100,))
|
168 |
)
|
169 |
+
train_dataloader = DataLoader(
|
170 |
+
dataset,
|
171 |
+
batch_size=8, # Reduced batch size
|
172 |
shuffle=True
|
173 |
)
|
174 |
|
175 |
model, optimizer, train_dataloader = accelerator.prepare(
|
176 |
+
model,
|
177 |
+
optimizer,
|
178 |
train_dataloader
|
179 |
)
|
180 |
|
181 |
+
scaler = GradScaler() # Mixed precision training
|
182 |
+
|
183 |
for epoch in range(num_epochs):
|
184 |
model.train()
|
185 |
total_loss = 0
|
186 |
for batch_idx, (inputs, labels) in enumerate(train_dataloader):
|
187 |
optimizer.zero_grad()
|
188 |
+
with autocast(): # Mixed precision training
|
189 |
+
outputs = model(inputs)
|
190 |
+
loss = criterion(outputs, labels)
|
191 |
+
scaler.scale(loss).backward()
|
192 |
+
scaler.step(optimizer)
|
193 |
+
scaler.update()
|
194 |
total_loss += loss.item()
|
|
|
195 |
avg_loss = total_loss / len(train_dataloader)
|
196 |
print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss:.4f}")
|
197 |
|