Princess3 commited on
Commit
9a38883
1 Parent(s): cd02111

Upload model.py

Browse files
Files changed (1) hide show
  1. model.py +223 -26
model.py CHANGED
@@ -9,58 +9,242 @@ from accelerate import Accelerator
9
 
10
  class DynamicModel(nn.Module):
11
  def __init__(self, sections: Dict[str, List[Dict[str, Any]]]):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  super(DynamicModel, self).__init__()
13
  self.sections = nn.ModuleDict()
14
 
15
- # Default section if none provided
16
  if not sections:
17
  sections = {
18
  'default': [{
19
  'input_size': 128,
20
  'output_size': 256,
21
- 'activation': 'relu'
 
 
22
  }]
23
  }
24
 
 
25
  for section_name, layers in sections.items():
26
  self.sections[section_name] = nn.ModuleList()
27
  for layer_params in layers:
28
  self.sections[section_name].append(self.create_layer(layer_params))
29
 
30
  def create_layer(self, layer_params: Dict[str, Any]) -> nn.Module:
31
- layer = nn.Linear(layer_params['input_size'], layer_params['output_size'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  activation = layer_params.get('activation', 'relu')
33
  if activation == 'relu':
34
- return nn.Sequential(layer, nn.ReLU())
35
  elif activation == 'tanh':
36
- return nn.Sequential(layer, nn.Tanh())
37
  elif activation == 'sigmoid':
38
- return nn.Sequential(layer, nn.Sigmoid())
39
- else:
40
- return layer
 
 
 
 
41
 
42
- def forward(self, x: torch.Tensor) -> torch.Tensor:
43
- for section_name, layers in self.sections.items():
44
- for layer in layers:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  x = layer(x)
 
 
 
 
46
  return x
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  def parse_xml_file(file_path: str) -> List[Dict[str, Any]]:
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  tree = ET.parse(file_path)
50
  root = tree.getroot()
51
 
52
  layers = []
53
- for prov in root.findall('.//prov'):
54
- layer_params = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  'input_size': 128,
56
  'output_size': 256,
57
  'activation': 'relu'
58
- }
59
- layers.append(layer_params)
60
 
61
  return layers
62
 
63
  def create_model_from_folder(folder_path: str) -> DynamicModel:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  sections = defaultdict(list)
65
 
66
  if not os.path.exists(folder_path):
@@ -87,30 +271,43 @@ def create_model_from_folder(folder_path: str) -> DynamicModel:
87
  return DynamicModel(dict(sections))
88
 
89
  def main():
90
- folder_path = 'data/'
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  model = create_model_from_folder(folder_path)
92
 
93
  print(f"Created dynamic PyTorch model with sections: {list(model.sections.keys())}")
94
 
95
- # Get first section's first layer's input size dynamically
96
  first_section = next(iter(model.sections.keys()))
97
  first_layer = model.sections[first_section][0]
98
  input_features = first_layer[0].in_features
99
-
100
- # Create sample input tensor matching the model's expected input size
101
  sample_input = torch.randn(1, input_features)
102
  output = model(sample_input)
103
  print(f"Sample output shape: {output.shape}")
104
 
105
- # Initialize accelerator for distributed training
106
  accelerator = Accelerator()
107
-
108
- # Setup optimization components
109
  optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
110
  criterion = nn.CrossEntropyLoss()
111
  num_epochs = 10
112
 
113
- # Create synthetic dataset for demonstration
114
  dataset = torch.utils.data.TensorDataset(
115
  torch.randn(100, input_features),
116
  torch.randint(0, 2, (100,))
@@ -121,14 +318,14 @@ def main():
121
  shuffle=True
122
  )
123
 
124
- # Prepare for distributed training
125
  model, optimizer, train_dataloader = accelerator.prepare(
126
  model,
127
  optimizer,
128
  train_dataloader
129
  )
130
 
131
- # Training loop
132
  for epoch in range(num_epochs):
133
  model.train()
134
  total_loss = 0
@@ -139,7 +336,7 @@ def main():
139
  accelerator.backward(loss)
140
  optimizer.step()
141
  total_loss += loss.item()
142
-
143
  avg_loss = total_loss / len(train_dataloader)
144
  print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss:.4f}")
145
 
 
9
 
10
  class DynamicModel(nn.Module):
11
  def __init__(self, sections: Dict[str, List[Dict[str, Any]]]):
12
+ """
13
+ Initialize the DynamicModel with configurable neural network sections.
14
+
15
+ Args:
16
+ sections (Dict[str, List[Dict[str, Any]]]): Dictionary mapping section names to lists of layer configurations.
17
+ Each layer configuration is a dictionary containing:
18
+ - input_size (int): Size of input features
19
+ - output_size (int): Size of output features
20
+ - activation (str, optional): Activation function name ('relu', 'tanh', 'sigmoid', etc.)
21
+ - dropout (float, optional): Dropout rate
22
+ - batch_norm (bool, optional): Whether to use batch normalization
23
+ - hidden_layers (List[Dict[str, Any]], optional): List of hidden layer configurations
24
+ - memory_augmentation (bool, optional): Whether to add a memory augmentation layer
25
+ - hybrid_attention (bool, optional): Whether to add a hybrid attention layer
26
+ - dynamic_flash_attention (bool, optional): Whether to add a dynamic flash attention layer
27
+
28
+ Example:
29
+ sections = {
30
+ 'encoder': [
31
+ {'input_size': 128, 'output_size': 256, 'activation': 'relu', 'batch_norm': True},
32
+ {'input_size': 256, 'output_size': 512, 'activation': 'leaky_relu', 'dropout': 0.1}
33
+ ],
34
+ 'decoder': [
35
+ {'input_size': 512, 'output_size': 256, 'activation': 'elu'},
36
+ {'input_size': 256, 'output_size': 128, 'activation': 'tanh'}
37
+ ]
38
+ }
39
+ """
40
  super(DynamicModel, self).__init__()
41
  self.sections = nn.ModuleDict()
42
 
43
+ # Default section configuration if none provided
44
  if not sections:
45
  sections = {
46
  'default': [{
47
  'input_size': 128,
48
  'output_size': 256,
49
+ 'activation': 'relu',
50
+ 'batch_norm': True,
51
+ 'dropout': 0.1
52
  }]
53
  }
54
 
55
+ # Initialize each section with its layer configurations
56
  for section_name, layers in sections.items():
57
  self.sections[section_name] = nn.ModuleList()
58
  for layer_params in layers:
59
  self.sections[section_name].append(self.create_layer(layer_params))
60
 
61
  def create_layer(self, layer_params: Dict[str, Any]) -> nn.Module:
62
+ """
63
+ Creates a neural network layer based on provided parameters.
64
+
65
+ Args:
66
+ layer_params (Dict[str, Any]): Dictionary containing layer configuration
67
+ Required keys:
68
+ - input_size (int): Size of input features
69
+ - output_size (int): Size of output features
70
+ Optional keys:
71
+ - activation (str): Activation function name ('relu', 'tanh', 'sigmoid', None)
72
+ - dropout (float): Dropout rate if needed
73
+ - batch_norm (bool): Whether to use batch normalization
74
+ - hidden_layers (List[Dict[str, Any]]): List of hidden layer configurations
75
+ - memory_augmentation (bool): Whether to add a memory augmentation layer
76
+ - hybrid_attention (bool): Whether to add a hybrid attention layer
77
+ - dynamic_flash_attention (bool): Whether to add a dynamic flash attention layer
78
+
79
+ Returns:
80
+ nn.Module: Configured neural network layer with activation
81
+
82
+ Raises:
83
+ KeyError: If required parameters are missing
84
+ ValueError: If activation function is not supported
85
+ """
86
+ layers = []
87
+
88
+ # Add linear layer
89
+ layers.append(nn.Linear(layer_params['input_size'], layer_params['output_size']))
90
+
91
+ # Add batch normalization if specified
92
+ if layer_params.get('batch_norm', False):
93
+ layers.append(nn.BatchNorm1d(layer_params['output_size']))
94
+
95
+ # Add activation function
96
  activation = layer_params.get('activation', 'relu')
97
  if activation == 'relu':
98
+ layers.append(nn.ReLU(inplace=True))
99
  elif activation == 'tanh':
100
+ layers.append(nn.Tanh())
101
  elif activation == 'sigmoid':
102
+ layers.append(nn.Sigmoid())
103
+ elif activation == 'leaky_relu':
104
+ layers.append(nn.LeakyReLU(negative_slope=0.01, inplace=True))
105
+ elif activation == 'elu':
106
+ layers.append(nn.ELU(alpha=1.0, inplace=True))
107
+ elif activation is not None:
108
+ raise ValueError(f"Unsupported activation function: {activation}")
109
 
110
+ # Add dropout if specified
111
+ if dropout_rate := layer_params.get('dropout', 0.0):
112
+ layers.append(nn.Dropout(p=dropout_rate))
113
+
114
+ # Add hidden layers if specified
115
+ if hidden_layers := layer_params.get('hidden_layers', []):
116
+ for hidden_layer_params in hidden_layers:
117
+ layers.append(self.create_layer(hidden_layer_params))
118
+
119
+ # Add memory augmentation layer if specified
120
+ if layer_params.get('memory_augmentation', False):
121
+ layers.append(MemoryAugmentationLayer(layer_params['output_size']))
122
+
123
+ # Add hybrid attention layer if specified
124
+ if layer_params.get('hybrid_attention', False):
125
+ layers.append(HybridAttentionLayer(layer_params['output_size']))
126
+
127
+ # Add dynamic flash attention layer if specified
128
+ if layer_params.get('dynamic_flash_attention', False):
129
+ layers.append(DynamicFlashAttentionLayer(layer_params['output_size']))
130
+
131
+ return nn.Sequential(*layers)
132
+
133
+ def forward(self, x: torch.Tensor, section_name: Optional[str] = None) -> torch.Tensor:
134
+ """
135
+ Forward pass through the dynamic model architecture.
136
+
137
+ Args:
138
+ x (torch.Tensor): Input tensor to process
139
+ section_name (Optional[str]): Specific section to process. If None, processes all sections
140
+
141
+ Returns:
142
+ torch.Tensor: Processed output tensor
143
+
144
+ Raises:
145
+ KeyError: If specified section_name doesn't exist
146
+ """
147
+ if section_name is not None:
148
+ if section_name not in self.sections:
149
+ raise KeyError(f"Section '{section_name}' not found in model")
150
+ for layer in self.sections[section_name]:
151
  x = layer(x)
152
+ else:
153
+ for section_name, layers in self.sections.items():
154
+ for layer in layers:
155
+ x = layer(x)
156
  return x
157
 
158
+ class MemoryAugmentationLayer(nn.Module):
159
+ def __init__(self, size: int):
160
+ super(MemoryAugmentationLayer, self).__init__()
161
+ self.memory = nn.Parameter(torch.randn(size))
162
+
163
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
164
+ return x + self.memory
165
+
166
+ class HybridAttentionLayer(nn.Module):
167
+ def __init__(self, size: int):
168
+ super(HybridAttentionLayer, self).__init__()
169
+ self.attention = nn.MultiheadAttention(size, num_heads=8)
170
+
171
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
172
+ x = x.unsqueeze(1) # Add sequence dimension
173
+ attn_output, _ = self.attention(x, x, x)
174
+ return attn_output.squeeze(1)
175
+
176
+ class DynamicFlashAttentionLayer(nn.Module):
177
+ def __init__(self, size: int):
178
+ super(DynamicFlashAttentionLayer, self).__init__()
179
+ self.attention = nn.MultiheadAttention(size, num_heads=8)
180
+
181
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
182
+ x = x.unsqueeze(1) # Add sequence dimension
183
+ attn_output, _ = self.attention(x, x, x)
184
+ return attn_output.squeeze(1)
185
+
186
  def parse_xml_file(file_path: str) -> List[Dict[str, Any]]:
187
+ """
188
+ Parses an XML configuration file to extract layer parameters for neural network construction.
189
+
190
+ Args:
191
+ file_path (str): Path to the XML configuration file
192
+
193
+ Returns:
194
+ List[Dict[str, Any]]: List of dictionaries containing layer configurations
195
+
196
+ Raises:
197
+ ET.ParseError: If XML file is malformed
198
+ KeyError: If required attributes are missing in XML
199
+ """
200
  tree = ET.parse(file_path)
201
  root = tree.getroot()
202
 
203
  layers = []
204
+ for layer in root.findall('.//layer'):
205
+ layer_params = {}
206
+ layer_params['input_size'] = int(layer.get('input_size', 128))
207
+ layer_params['output_size'] = int(layer.get('output_size', 256))
208
+ layer_params['activation'] = layer.get('activation', 'relu').lower()
209
+
210
+ # Validate activation function
211
+ if layer_params['activation'] not in ['relu', 'tanh', 'sigmoid', 'none']:
212
+ raise ValueError(f"Unsupported activation function: {layer_params['activation']}")
213
+
214
+ # Validate dimensions
215
+ if layer_params['input_size'] <= 0 or layer_params['output_size'] <= 0:
216
+ raise ValueError("Layer dimensions must be positive integers")
217
+
218
+ layers.append(layer_params)
219
+
220
+ if not layers:
221
+ # Fallback to default configuration if no layers found
222
+ layers.append({
223
  'input_size': 128,
224
  'output_size': 256,
225
  'activation': 'relu'
226
+ })
 
227
 
228
  return layers
229
 
230
  def create_model_from_folder(folder_path: str) -> DynamicModel:
231
+ """
232
+ Creates a DynamicModel instance by parsing XML files in the specified folder structure.
233
+
234
+ Each subfolder represents a model section, and XML files within contain layer configurations.
235
+ The function recursively walks through the folder structure, processing all XML files to build
236
+ the model architecture.
237
+
238
+ Args:
239
+ folder_path (str): Path to the root folder containing XML configuration files
240
+
241
+ Returns:
242
+ DynamicModel: A configured neural network model based on the XML specifications
243
+
244
+ Raises:
245
+ FileNotFoundError: If the specified folder path doesn't exist
246
+ ET.ParseError: If XML parsing fails for any configuration file
247
+ """
248
  sections = defaultdict(list)
249
 
250
  if not os.path.exists(folder_path):
 
271
  return DynamicModel(dict(sections))
272
 
273
  def main():
274
+ """
275
+ Main function that demonstrates the creation and training of a dynamic PyTorch model.
276
+
277
+ This function:
278
+ 1. Creates a dynamic model from XML configurations
279
+ 2. Sets up distributed training environment using Accelerator
280
+ 3. Configures optimization components (optimizer, loss function)
281
+ 4. Creates synthetic dataset for demonstration
282
+ 5. Implements distributed training loop with loss tracking
283
+
284
+ The model architecture is determined by XML files in the 'Xml_Data' folder,
285
+ where each subfolder represents a model section containing layer configurations.
286
+ """
287
+ folder_path = 'Xml_Data'
288
  model = create_model_from_folder(folder_path)
289
 
290
  print(f"Created dynamic PyTorch model with sections: {list(model.sections.keys())}")
291
 
292
+ # Dynamically determine input size from first layer configuration
293
  first_section = next(iter(model.sections.keys()))
294
  first_layer = model.sections[first_section][0]
295
  input_features = first_layer[0].in_features
296
+
297
+ # Validate model with sample input
298
  sample_input = torch.randn(1, input_features)
299
  output = model(sample_input)
300
  print(f"Sample output shape: {output.shape}")
301
 
302
+ # Initialize distributed training components
303
  accelerator = Accelerator()
304
+
305
+ # Configure training parameters and optimization components
306
  optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
307
  criterion = nn.CrossEntropyLoss()
308
  num_epochs = 10
309
 
310
+ # Generate synthetic dataset for demonstration purposes
311
  dataset = torch.utils.data.TensorDataset(
312
  torch.randn(100, input_features),
313
  torch.randint(0, 2, (100,))
 
318
  shuffle=True
319
  )
320
 
321
+ # Prepare model, optimizer, and dataloader for distributed training
322
  model, optimizer, train_dataloader = accelerator.prepare(
323
  model,
324
  optimizer,
325
  train_dataloader
326
  )
327
 
328
+ # Execute training loop with distributed processing
329
  for epoch in range(num_epochs):
330
  model.train()
331
  total_loss = 0
 
336
  accelerator.backward(loss)
337
  optimizer.step()
338
  total_loss += loss.item()
339
+
340
  avg_loss = total_loss / len(train_dataloader)
341
  print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss:.4f}")
342