Princess3 commited on
Commit
775c6bb
·
verified ·
1 Parent(s): bf3b766

Upload model.py

Browse files
Files changed (1) hide show
  1. model.py +2 -130
model.py CHANGED
@@ -9,38 +9,9 @@ from accelerate import Accelerator
9
 
10
  class DynamicModel(nn.Module):
11
  def __init__(self, sections: Dict[str, List[Dict[str, Any]]]):
12
- """
13
- Initialize the DynamicModel with configurable neural network sections.
14
-
15
- Args:
16
- sections (Dict[str, List[Dict[str, Any]]]): Dictionary mapping section names to lists of layer configurations.
17
- Each layer configuration is a dictionary containing:
18
- - input_size (int): Size of input features
19
- - output_size (int): Size of output features
20
- - activation (str, optional): Activation function name ('relu', 'tanh', 'sigmoid', etc.)
21
- - dropout (float, optional): Dropout rate
22
- - batch_norm (bool, optional): Whether to use batch normalization
23
- - hidden_layers (List[Dict[str, Any]], optional): List of hidden layer configurations
24
- - memory_augmentation (bool, optional): Whether to add a memory augmentation layer
25
- - hybrid_attention (bool, optional): Whether to add a hybrid attention layer
26
- - dynamic_flash_attention (bool, optional): Whether to add a dynamic flash attention layer
27
-
28
- Example:
29
- sections = {
30
- 'encoder': [
31
- {'input_size': 128, 'output_size': 256, 'activation': 'relu', 'batch_norm': True},
32
- {'input_size': 256, 'output_size': 512, 'activation': 'leaky_relu', 'dropout': 0.1}
33
- ],
34
- 'decoder': [
35
- {'input_size': 512, 'output_size': 256, 'activation': 'elu'},
36
- {'input_size': 256, 'output_size': 128, 'activation': 'tanh'}
37
- ]
38
- }
39
- """
40
  super(DynamicModel, self).__init__()
41
  self.sections = nn.ModuleDict()
42
 
43
- # Default section configuration if none provided
44
  if not sections:
45
  sections = {
46
  'default': [{
@@ -52,47 +23,19 @@ class DynamicModel(nn.Module):
52
  }]
53
  }
54
 
55
- # Initialize each section with its layer configurations
56
  for section_name, layers in sections.items():
57
  self.sections[section_name] = nn.ModuleList()
58
  for layer_params in layers:
 
59
  self.sections[section_name].append(self.create_layer(layer_params))
60
 
61
  def create_layer(self, layer_params: Dict[str, Any]) -> nn.Module:
62
- """
63
- Creates a neural network layer based on provided parameters.
64
-
65
- Args:
66
- layer_params (Dict[str, Any]): Dictionary containing layer configuration
67
- Required keys:
68
- - input_size (int): Size of input features
69
- - output_size (int): Size of output features
70
- Optional keys:
71
- - activation (str): Activation function name ('relu', 'tanh', 'sigmoid', None)
72
- - dropout (float): Dropout rate if needed
73
- - batch_norm (bool): Whether to use batch normalization
74
- - hidden_layers (List[Dict[str, Any]]): List of hidden layer configurations
75
- - memory_augmentation (bool): Whether to add a memory augmentation layer
76
- - hybrid_attention (bool): Whether to add a hybrid attention layer
77
- - dynamic_flash_attention (bool): Whether to add a dynamic flash attention layer
78
-
79
- Returns:
80
- nn.Module: Configured neural network layer with activation
81
-
82
- Raises:
83
- KeyError: If required parameters are missing
84
- ValueError: If activation function is not supported
85
- """
86
  layers = []
87
-
88
- # Add linear layer
89
  layers.append(nn.Linear(layer_params['input_size'], layer_params['output_size']))
90
 
91
- # Add batch normalization if specified
92
  if layer_params.get('batch_norm', False):
93
  layers.append(nn.BatchNorm1d(layer_params['output_size']))
94
 
95
- # Add activation function
96
  activation = layer_params.get('activation', 'relu')
97
  if activation == 'relu':
98
  layers.append(nn.ReLU(inplace=True))
@@ -107,43 +50,25 @@ class DynamicModel(nn.Module):
107
  elif activation is not None:
108
  raise ValueError(f"Unsupported activation function: {activation}")
109
 
110
- # Add dropout if specified
111
  if dropout_rate := layer_params.get('dropout', 0.0):
112
  layers.append(nn.Dropout(p=dropout_rate))
113
 
114
- # Add hidden layers if specified
115
  if hidden_layers := layer_params.get('hidden_layers', []):
116
  for hidden_layer_params in hidden_layers:
117
  layers.append(self.create_layer(hidden_layer_params))
118
 
119
- # Add memory augmentation layer if specified
120
  if layer_params.get('memory_augmentation', False):
121
  layers.append(MemoryAugmentationLayer(layer_params['output_size']))
122
 
123
- # Add hybrid attention layer if specified
124
  if layer_params.get('hybrid_attention', False):
125
  layers.append(HybridAttentionLayer(layer_params['output_size']))
126
 
127
- # Add dynamic flash attention layer if specified
128
  if layer_params.get('dynamic_flash_attention', False):
129
  layers.append(DynamicFlashAttentionLayer(layer_params['output_size']))
130
 
131
  return nn.Sequential(*layers)
132
 
133
  def forward(self, x: torch.Tensor, section_name: Optional[str] = None) -> torch.Tensor:
134
- """
135
- Forward pass through the dynamic model architecture.
136
-
137
- Args:
138
- x (torch.Tensor): Input tensor to process
139
- section_name (Optional[str]): Specific section to process. If None, processes all sections
140
-
141
- Returns:
142
- torch.Tensor: Processed output tensor
143
-
144
- Raises:
145
- KeyError: If specified section_name doesn't exist
146
- """
147
  if section_name is not None:
148
  if section_name not in self.sections:
149
  raise KeyError(f"Section '{section_name}' not found in model")
@@ -184,19 +109,6 @@ class DynamicFlashAttentionLayer(nn.Module):
184
  return attn_output.squeeze(1)
185
 
186
  def parse_xml_file(file_path: str) -> List[Dict[str, Any]]:
187
- """
188
- Parses an XML configuration file to extract layer parameters for neural network construction.
189
-
190
- Args:
191
- file_path (str): Path to the XML configuration file
192
-
193
- Returns:
194
- List[Dict[str, Any]]: List of dictionaries containing layer configurations
195
-
196
- Raises:
197
- ET.ParseError: If XML file is malformed
198
- KeyError: If required attributes are missing in XML
199
- """
200
  tree = ET.parse(file_path)
201
  root = tree.getroot()
202
 
@@ -207,18 +119,15 @@ def parse_xml_file(file_path: str) -> List[Dict[str, Any]]:
207
  layer_params['output_size'] = int(layer.get('output_size', 256))
208
  layer_params['activation'] = layer.get('activation', 'relu').lower()
209
 
210
- # Validate activation function
211
  if layer_params['activation'] not in ['relu', 'tanh', 'sigmoid', 'none']:
212
  raise ValueError(f"Unsupported activation function: {layer_params['activation']}")
213
 
214
- # Validate dimensions
215
  if layer_params['input_size'] <= 0 or layer_params['output_size'] <= 0:
216
  raise ValueError("Layer dimensions must be positive integers")
217
 
218
  layers.append(layer_params)
219
 
220
  if not layers:
221
- # Fallback to default configuration if no layers found
222
  layers.append({
223
  'input_size': 128,
224
  'output_size': 256,
@@ -228,23 +137,6 @@ def parse_xml_file(file_path: str) -> List[Dict[str, Any]]:
228
  return layers
229
 
230
  def create_model_from_folder(folder_path: str) -> DynamicModel:
231
- """
232
- Creates a DynamicModel instance by parsing XML files in the specified folder structure.
233
-
234
- Each subfolder represents a model section, and XML files within contain layer configurations.
235
- The function recursively walks through the folder structure, processing all XML files to build
236
- the model architecture.
237
-
238
- Args:
239
- folder_path (str): Path to the root folder containing XML configuration files
240
-
241
- Returns:
242
- DynamicModel: A configured neural network model based on the XML specifications
243
-
244
- Raises:
245
- FileNotFoundError: If the specified folder path doesn't exist
246
- ET.ParseError: If XML parsing fails for any configuration file
247
- """
248
  sections = defaultdict(list)
249
 
250
  if not os.path.exists(folder_path):
@@ -259,7 +151,7 @@ def create_model_from_folder(folder_path: str) -> DynamicModel:
259
  file_path = os.path.join(root, file)
260
  try:
261
  layers = parse_xml_file(file_path)
262
- section_name = os.path.basename(root).replace('.', '_') # Replace periods with underscores
263
  sections[section_name].extend(layers)
264
  except Exception as e:
265
  print(f"Error processing {file_path}: {str(e)}")
@@ -271,43 +163,25 @@ def create_model_from_folder(folder_path: str) -> DynamicModel:
271
  return DynamicModel(dict(sections))
272
 
273
  def main():
274
- """
275
- Main function that demonstrates the creation and training of a dynamic PyTorch model.
276
-
277
- This function:
278
- 1. Creates a dynamic model from XML configurations
279
- 2. Sets up distributed training environment using Accelerator
280
- 3. Configures optimization components (optimizer, loss function)
281
- 4. Creates synthetic dataset for demonstration
282
- 5. Implements distributed training loop with loss tracking
283
-
284
- The model architecture is determined by XML files in the 'Xml_Data' folder,
285
- where each subfolder represents a model section containing layer configurations.
286
- """
287
  folder_path = 'data'
288
  model = create_model_from_folder(folder_path)
289
 
290
  print(f"Created dynamic PyTorch model with sections: {list(model.sections.keys())}")
291
 
292
- # Dynamically determine input size from first layer configuration
293
  first_section = next(iter(model.sections.keys()))
294
  first_layer = model.sections[first_section][0]
295
  input_features = first_layer[0].in_features
296
 
297
- # Validate model with sample input
298
  sample_input = torch.randn(1, input_features)
299
  output = model(sample_input)
300
  print(f"Sample output shape: {output.shape}")
301
 
302
- # Initialize distributed training components
303
  accelerator = Accelerator()
304
 
305
- # Configure training parameters and optimization components
306
  optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
307
  criterion = nn.CrossEntropyLoss()
308
  num_epochs = 10
309
 
310
- # Generate synthetic dataset for demonstration purposes
311
  dataset = torch.utils.data.TensorDataset(
312
  torch.randn(100, input_features),
313
  torch.randint(0, 2, (100,))
@@ -318,14 +192,12 @@ def main():
318
  shuffle=True
319
  )
320
 
321
- # Prepare model, optimizer, and dataloader for distributed training
322
  model, optimizer, train_dataloader = accelerator.prepare(
323
  model,
324
  optimizer,
325
  train_dataloader
326
  )
327
 
328
- # Execute training loop with distributed processing
329
  for epoch in range(num_epochs):
330
  model.train()
331
  total_loss = 0
 
9
 
10
  class DynamicModel(nn.Module):
11
  def __init__(self, sections: Dict[str, List[Dict[str, Any]]]):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  super(DynamicModel, self).__init__()
13
  self.sections = nn.ModuleDict()
14
 
 
15
  if not sections:
16
  sections = {
17
  'default': [{
 
23
  }]
24
  }
25
 
 
26
  for section_name, layers in sections.items():
27
  self.sections[section_name] = nn.ModuleList()
28
  for layer_params in layers:
29
+ print(f"Creating layer in section '{section_name}' with params: {layer_params}")
30
  self.sections[section_name].append(self.create_layer(layer_params))
31
 
32
  def create_layer(self, layer_params: Dict[str, Any]) -> nn.Module:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  layers = []
 
 
34
  layers.append(nn.Linear(layer_params['input_size'], layer_params['output_size']))
35
 
 
36
  if layer_params.get('batch_norm', False):
37
  layers.append(nn.BatchNorm1d(layer_params['output_size']))
38
 
 
39
  activation = layer_params.get('activation', 'relu')
40
  if activation == 'relu':
41
  layers.append(nn.ReLU(inplace=True))
 
50
  elif activation is not None:
51
  raise ValueError(f"Unsupported activation function: {activation}")
52
 
 
53
  if dropout_rate := layer_params.get('dropout', 0.0):
54
  layers.append(nn.Dropout(p=dropout_rate))
55
 
 
56
  if hidden_layers := layer_params.get('hidden_layers', []):
57
  for hidden_layer_params in hidden_layers:
58
  layers.append(self.create_layer(hidden_layer_params))
59
 
 
60
  if layer_params.get('memory_augmentation', False):
61
  layers.append(MemoryAugmentationLayer(layer_params['output_size']))
62
 
 
63
  if layer_params.get('hybrid_attention', False):
64
  layers.append(HybridAttentionLayer(layer_params['output_size']))
65
 
 
66
  if layer_params.get('dynamic_flash_attention', False):
67
  layers.append(DynamicFlashAttentionLayer(layer_params['output_size']))
68
 
69
  return nn.Sequential(*layers)
70
 
71
  def forward(self, x: torch.Tensor, section_name: Optional[str] = None) -> torch.Tensor:
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  if section_name is not None:
73
  if section_name not in self.sections:
74
  raise KeyError(f"Section '{section_name}' not found in model")
 
109
  return attn_output.squeeze(1)
110
 
111
  def parse_xml_file(file_path: str) -> List[Dict[str, Any]]:
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  tree = ET.parse(file_path)
113
  root = tree.getroot()
114
 
 
119
  layer_params['output_size'] = int(layer.get('output_size', 256))
120
  layer_params['activation'] = layer.get('activation', 'relu').lower()
121
 
 
122
  if layer_params['activation'] not in ['relu', 'tanh', 'sigmoid', 'none']:
123
  raise ValueError(f"Unsupported activation function: {layer_params['activation']}")
124
 
 
125
  if layer_params['input_size'] <= 0 or layer_params['output_size'] <= 0:
126
  raise ValueError("Layer dimensions must be positive integers")
127
 
128
  layers.append(layer_params)
129
 
130
  if not layers:
 
131
  layers.append({
132
  'input_size': 128,
133
  'output_size': 256,
 
137
  return layers
138
 
139
  def create_model_from_folder(folder_path: str) -> DynamicModel:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  sections = defaultdict(list)
141
 
142
  if not os.path.exists(folder_path):
 
151
  file_path = os.path.join(root, file)
152
  try:
153
  layers = parse_xml_file(file_path)
154
+ section_name = os.path.basename(root).replace('.', '_')
155
  sections[section_name].extend(layers)
156
  except Exception as e:
157
  print(f"Error processing {file_path}: {str(e)}")
 
163
  return DynamicModel(dict(sections))
164
 
165
  def main():
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  folder_path = 'data'
167
  model = create_model_from_folder(folder_path)
168
 
169
  print(f"Created dynamic PyTorch model with sections: {list(model.sections.keys())}")
170
 
 
171
  first_section = next(iter(model.sections.keys()))
172
  first_layer = model.sections[first_section][0]
173
  input_features = first_layer[0].in_features
174
 
 
175
  sample_input = torch.randn(1, input_features)
176
  output = model(sample_input)
177
  print(f"Sample output shape: {output.shape}")
178
 
 
179
  accelerator = Accelerator()
180
 
 
181
  optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
182
  criterion = nn.CrossEntropyLoss()
183
  num_epochs = 10
184
 
 
185
  dataset = torch.utils.data.TensorDataset(
186
  torch.randn(100, input_features),
187
  torch.randint(0, 2, (100,))
 
192
  shuffle=True
193
  )
194
 
 
195
  model, optimizer, train_dataloader = accelerator.prepare(
196
  model,
197
  optimizer,
198
  train_dataloader
199
  )
200
 
 
201
  for epoch in range(num_epochs):
202
  model.train()
203
  total_loss = 0