Princess3 commited on
Commit
d5730cd
·
verified ·
1 Parent(s): bf51013

Update model.py

Browse files
Files changed (1) hide show
  1. model.py +16 -37
model.py CHANGED
@@ -6,12 +6,13 @@ import torch.nn.functional as F
6
  from typing import List, Dict, Any, Optional
7
  from collections import defaultdict
8
  from accelerate import Accelerator
 
 
9
 
10
  class DynamicModel(nn.Module):
11
  def __init__(self, sections: Dict[str, List[Dict[str, Any]]]):
12
  super(DynamicModel, self).__init__()
13
  self.sections = nn.ModuleDict()
14
-
15
  if not sections:
16
  sections = {
17
  'default': [{
@@ -22,7 +23,6 @@ class DynamicModel(nn.Module):
22
  'dropout': 0.1
23
  }]
24
  }
25
-
26
  for section_name, layers in sections.items():
27
  self.sections[section_name] = nn.ModuleList()
28
  for layer_params in layers:
@@ -32,10 +32,8 @@ class DynamicModel(nn.Module):
32
  def create_layer(self, layer_params: Dict[str, Any]) -> nn.Module:
33
  layers = []
34
  layers.append(nn.Linear(layer_params['input_size'], layer_params['output_size']))
35
-
36
  if layer_params.get('batch_norm', False):
37
  layers.append(nn.BatchNorm1d(layer_params['output_size']))
38
-
39
  activation = layer_params.get('activation', 'relu')
40
  if activation == 'relu':
41
  layers.append(nn.ReLU(inplace=True))
@@ -49,23 +47,17 @@ class DynamicModel(nn.Module):
49
  layers.append(nn.ELU(alpha=1.0, inplace=True))
50
  elif activation is not None:
51
  raise ValueError(f"Unsupported activation function: {activation}")
52
-
53
  if dropout_rate := layer_params.get('dropout', 0.0):
54
  layers.append(nn.Dropout(p=dropout_rate))
55
-
56
  if hidden_layers := layer_params.get('hidden_layers', []):
57
  for hidden_layer_params in hidden_layers:
58
  layers.append(self.create_layer(hidden_layer_params))
59
-
60
  if layer_params.get('memory_augmentation', True):
61
  layers.append(MemoryAugmentationLayer(layer_params['output_size']))
62
-
63
  if layer_params.get('hybrid_attention', True):
64
  layers.append(HybridAttentionLayer(layer_params['output_size']))
65
-
66
  if layer_params.get('dynamic_flash_attention', True):
67
  layers.append(DynamicFlashAttentionLayer(layer_params['output_size']))
68
-
69
  return nn.Sequential(*layers)
70
 
71
  def forward(self, x: torch.Tensor, section_name: Optional[str] = None) -> torch.Tensor:
@@ -111,38 +103,30 @@ class DynamicFlashAttentionLayer(nn.Module):
111
  def parse_xml_file(file_path: str) -> List[Dict[str, Any]]:
112
  tree = ET.parse(file_path)
113
  root = tree.getroot()
114
-
115
  layers = []
116
  for layer in root.findall('.//layer'):
117
  layer_params = {}
118
  layer_params['input_size'] = int(layer.get('input_size', 128))
119
  layer_params['output_size'] = int(layer.get('output_size', 256))
120
  layer_params['activation'] = layer.get('activation', 'relu').lower()
121
-
122
  if layer_params['activation'] not in ['relu', 'tanh', 'sigmoid', 'none']:
123
  raise ValueError(f"Unsupported activation function: {layer_params['activation']}")
124
-
125
  if layer_params['input_size'] <= 0 or layer_params['output_size'] <= 0:
126
  raise ValueError("Layer dimensions must be positive integers")
127
-
128
  layers.append(layer_params)
129
-
130
  if not layers:
131
  layers.append({
132
  'input_size': 128,
133
  'output_size': 256,
134
  'activation': 'relu'
135
  })
136
-
137
  return layers
138
 
139
  def create_model_from_folder(folder_path: str) -> DynamicModel:
140
  sections = defaultdict(list)
141
-
142
  if not os.path.exists(folder_path):
143
  print(f"Warning: Folder {folder_path} does not exist. Creating model with default configuration.")
144
  return DynamicModel({})
145
-
146
  xml_files_found = False
147
  for root, dirs, files in os.walk(folder_path):
148
  for file in files:
@@ -155,64 +139,59 @@ def create_model_from_folder(folder_path: str) -> DynamicModel:
155
  sections[section_name].extend(layers)
156
  except Exception as e:
157
  print(f"Error processing {file_path}: {str(e)}")
158
-
159
  if not xml_files_found:
160
  print("Warning: No XML files found. Creating model with default configuration.")
161
  return DynamicModel({})
162
-
163
  return DynamicModel(dict(sections))
164
 
165
  def main():
166
  folder_path = 'data'
167
  model = create_model_from_folder(folder_path)
168
-
169
  print(f"Created dynamic PyTorch model with sections: {list(model.sections.keys())}")
170
-
171
  # Print the model architecture
172
  print(model)
173
-
174
  first_section = next(iter(model.sections.keys()))
175
  first_layer = model.sections[first_section][0]
176
  input_features = first_layer[0].in_features
177
-
178
  # Ensure the input tensor size matches the expected input size
179
  sample_input = torch.randn(1, input_features)
180
  output = model(sample_input)
181
  print(f"Sample output shape: {output.shape}")
182
 
183
  accelerator = Accelerator()
184
-
185
  optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
186
  criterion = nn.CrossEntropyLoss()
187
  num_epochs = 10
188
-
189
- dataset = torch.utils.data.TensorDataset(
190
  torch.randn(100, input_features),
191
  torch.randint(0, 2, (100,))
192
  )
193
- train_dataloader = torch.utils.data.DataLoader(
194
- dataset,
195
- batch_size=16,
196
  shuffle=True
197
  )
198
 
199
  model, optimizer, train_dataloader = accelerator.prepare(
200
- model,
201
- optimizer,
202
  train_dataloader
203
  )
204
 
 
 
205
  for epoch in range(num_epochs):
206
  model.train()
207
  total_loss = 0
208
  for batch_idx, (inputs, labels) in enumerate(train_dataloader):
209
  optimizer.zero_grad()
210
- outputs = model(inputs)
211
- loss = criterion(outputs, labels)
212
- accelerator.backward(loss)
213
- optimizer.step()
 
 
214
  total_loss += loss.item()
215
-
216
  avg_loss = total_loss / len(train_dataloader)
217
  print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss:.4f}")
218
 
 
6
  from typing import List, Dict, Any, Optional
7
  from collections import defaultdict
8
  from accelerate import Accelerator
9
+ from torch.utils.data import DataLoader, TensorDataset
10
+ from torch.cuda.amp import GradScaler, autocast
11
 
12
  class DynamicModel(nn.Module):
13
  def __init__(self, sections: Dict[str, List[Dict[str, Any]]]):
14
  super(DynamicModel, self).__init__()
15
  self.sections = nn.ModuleDict()
 
16
  if not sections:
17
  sections = {
18
  'default': [{
 
23
  'dropout': 0.1
24
  }]
25
  }
 
26
  for section_name, layers in sections.items():
27
  self.sections[section_name] = nn.ModuleList()
28
  for layer_params in layers:
 
32
  def create_layer(self, layer_params: Dict[str, Any]) -> nn.Module:
33
  layers = []
34
  layers.append(nn.Linear(layer_params['input_size'], layer_params['output_size']))
 
35
  if layer_params.get('batch_norm', False):
36
  layers.append(nn.BatchNorm1d(layer_params['output_size']))
 
37
  activation = layer_params.get('activation', 'relu')
38
  if activation == 'relu':
39
  layers.append(nn.ReLU(inplace=True))
 
47
  layers.append(nn.ELU(alpha=1.0, inplace=True))
48
  elif activation is not None:
49
  raise ValueError(f"Unsupported activation function: {activation}")
 
50
  if dropout_rate := layer_params.get('dropout', 0.0):
51
  layers.append(nn.Dropout(p=dropout_rate))
 
52
  if hidden_layers := layer_params.get('hidden_layers', []):
53
  for hidden_layer_params in hidden_layers:
54
  layers.append(self.create_layer(hidden_layer_params))
 
55
  if layer_params.get('memory_augmentation', True):
56
  layers.append(MemoryAugmentationLayer(layer_params['output_size']))
 
57
  if layer_params.get('hybrid_attention', True):
58
  layers.append(HybridAttentionLayer(layer_params['output_size']))
 
59
  if layer_params.get('dynamic_flash_attention', True):
60
  layers.append(DynamicFlashAttentionLayer(layer_params['output_size']))
 
61
  return nn.Sequential(*layers)
62
 
63
  def forward(self, x: torch.Tensor, section_name: Optional[str] = None) -> torch.Tensor:
 
103
  def parse_xml_file(file_path: str) -> List[Dict[str, Any]]:
104
  tree = ET.parse(file_path)
105
  root = tree.getroot()
 
106
  layers = []
107
  for layer in root.findall('.//layer'):
108
  layer_params = {}
109
  layer_params['input_size'] = int(layer.get('input_size', 128))
110
  layer_params['output_size'] = int(layer.get('output_size', 256))
111
  layer_params['activation'] = layer.get('activation', 'relu').lower()
 
112
  if layer_params['activation'] not in ['relu', 'tanh', 'sigmoid', 'none']:
113
  raise ValueError(f"Unsupported activation function: {layer_params['activation']}")
 
114
  if layer_params['input_size'] <= 0 or layer_params['output_size'] <= 0:
115
  raise ValueError("Layer dimensions must be positive integers")
 
116
  layers.append(layer_params)
 
117
  if not layers:
118
  layers.append({
119
  'input_size': 128,
120
  'output_size': 256,
121
  'activation': 'relu'
122
  })
 
123
  return layers
124
 
125
  def create_model_from_folder(folder_path: str) -> DynamicModel:
126
  sections = defaultdict(list)
 
127
  if not os.path.exists(folder_path):
128
  print(f"Warning: Folder {folder_path} does not exist. Creating model with default configuration.")
129
  return DynamicModel({})
 
130
  xml_files_found = False
131
  for root, dirs, files in os.walk(folder_path):
132
  for file in files:
 
139
  sections[section_name].extend(layers)
140
  except Exception as e:
141
  print(f"Error processing {file_path}: {str(e)}")
 
142
  if not xml_files_found:
143
  print("Warning: No XML files found. Creating model with default configuration.")
144
  return DynamicModel({})
 
145
  return DynamicModel(dict(sections))
146
 
147
  def main():
148
  folder_path = 'data'
149
  model = create_model_from_folder(folder_path)
 
150
  print(f"Created dynamic PyTorch model with sections: {list(model.sections.keys())}")
 
151
  # Print the model architecture
152
  print(model)
 
153
  first_section = next(iter(model.sections.keys()))
154
  first_layer = model.sections[first_section][0]
155
  input_features = first_layer[0].in_features
 
156
  # Ensure the input tensor size matches the expected input size
157
  sample_input = torch.randn(1, input_features)
158
  output = model(sample_input)
159
  print(f"Sample output shape: {output.shape}")
160
 
161
  accelerator = Accelerator()
 
162
  optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
163
  criterion = nn.CrossEntropyLoss()
164
  num_epochs = 10
165
+ dataset = TensorDataset(
 
166
  torch.randn(100, input_features),
167
  torch.randint(0, 2, (100,))
168
  )
169
+ train_dataloader = DataLoader(
170
+ dataset,
171
+ batch_size=8, # Reduced batch size
172
  shuffle=True
173
  )
174
 
175
  model, optimizer, train_dataloader = accelerator.prepare(
176
+ model,
177
+ optimizer,
178
  train_dataloader
179
  )
180
 
181
+ scaler = GradScaler() # Mixed precision training
182
+
183
  for epoch in range(num_epochs):
184
  model.train()
185
  total_loss = 0
186
  for batch_idx, (inputs, labels) in enumerate(train_dataloader):
187
  optimizer.zero_grad()
188
+ with autocast(): # Mixed precision training
189
+ outputs = model(inputs)
190
+ loss = criterion(outputs, labels)
191
+ scaler.scale(loss).backward()
192
+ scaler.step(optimizer)
193
+ scaler.update()
194
  total_loss += loss.item()
 
195
  avg_loss = total_loss / len(train_dataloader)
196
  print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss:.4f}")
197