Princess3 commited on
Commit
d490ea9
·
verified ·
1 Parent(s): d5730cd

Create x.py

Browse files
Files changed (1) hide show
  1. x.py +224 -0
x.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import glob
3
+ import stat
4
+ import xml.etree.ElementTree as ET
5
+ import torch
6
+ import torch.nn as nn
7
+ import torch.nn.functional as F
8
+ from collections import defaultdict
9
+ from typing import List, Dict, Any, Optional
10
+ from colorama import Fore, Style, init
11
+ from accelerate import Accelerator
12
+ from torch.utils.data import DataLoader, TensorDataset
13
+ from torch.cuda.amp import GradScaler, autocast
14
+
15
+ # Initialize colorama
16
+ init(autoreset=True)
17
+
18
+ # Set file path and output path
19
+ file_path = 'data/'
20
+ output_path = 'output/'
21
+
22
+ # Create output path if it doesn't exist
23
+ if not os.path.exists(output_path):
24
+ os.makedirs(output_path)
25
+ os.chmod(output_path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) # Set full r/w permissions
26
+
27
+ # Ensure necessary files are created with full r/w permissions
28
+ def ensure_file(file_path):
29
+ if not os.path.exists(file_path):
30
+ with open(file_path, 'w') as f:
31
+ pass
32
+ os.chmod(file_path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) # Set full r/w permissions
33
+
34
+ # Define a simple memory augmentation module
35
+ class MemoryAugmentationLayer(nn.Module):
36
+ def __init__(self, size: int):
37
+ super(MemoryAugmentationLayer, self).__init__()
38
+ self.memory = nn.Parameter(torch.randn(size))
39
+
40
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
41
+ return x + self.memory
42
+
43
+ class HybridAttentionLayer(nn.Module):
44
+ def __init__(self, size: int):
45
+ super(HybridAttentionLayer, self).__init__()
46
+ self.attention = nn.MultiheadAttention(size, num_heads=8)
47
+
48
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
49
+ x = x.unsqueeze(1) # Add sequence dimension
50
+ attn_output, _ = self.attention(x, x, x)
51
+ return attn_output.squeeze(1)
52
+
53
+ class DynamicFlashAttentionLayer(nn.Module):
54
+ def __init__(self, size: int):
55
+ super(DynamicFlashAttentionLayer, self).__init__()
56
+ self.attention = nn.MultiheadAttention(size, num_heads=8)
57
+
58
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
59
+ x = x.unsqueeze(1) # Add sequence dimension
60
+ attn_output, _ = self.attention(x, x, x)
61
+ return attn_output.squeeze(1)
62
+
63
+ class DynamicModel(nn.Module):
64
+ def __init__(self, sections: Dict[str, List[Dict[str, Any]]]):
65
+ super(DynamicModel, self).__init__()
66
+ self.sections = nn.ModuleDict()
67
+ if not sections:
68
+ sections = {
69
+ 'default': [{
70
+ 'input_size': 128,
71
+ 'output_size': 256,
72
+ 'activation': 'relu',
73
+ 'batch_norm': True,
74
+ 'dropout': 0.1
75
+ }]
76
+ }
77
+ for section_name, layers in sections.items():
78
+ self.sections[section_name] = nn.ModuleList()
79
+ for layer_params in layers:
80
+ print(f"Creating layer in section '{section_name}' with params: {layer_params}")
81
+ self.sections[section_name].append(self.create_layer(layer_params))
82
+
83
+ def create_layer(self, layer_params: Dict[str, Any]) -> nn.Module:
84
+ layers = []
85
+ layers.append(nn.Linear(layer_params['input_size'], layer_params['output_size']))
86
+ if layer_params.get('batch_norm', False):
87
+ layers.append(nn.BatchNorm1d(layer_params['output_size']))
88
+ activation = layer_params.get('activation', 'relu')
89
+ if activation == 'relu':
90
+ layers.append(nn.ReLU(inplace=True))
91
+ elif activation == 'tanh':
92
+ layers.append(nn.Tanh())
93
+ elif activation == 'sigmoid':
94
+ layers.append(nn.Sigmoid())
95
+ elif activation == 'leaky_relu':
96
+ layers.append(nn.LeakyReLU(negative_slope=0.01, inplace=True))
97
+ elif activation == 'elu':
98
+ layers.append(nn.ELU(alpha=1.0, inplace=True))
99
+ elif activation is not None:
100
+ raise ValueError(f"Unsupported activation function: {activation}")
101
+ if dropout_rate := layer_params.get('dropout', 0.0):
102
+ layers.append(nn.Dropout(p=dropout_rate))
103
+ if hidden_layers := layer_params.get('hidden_layers', []):
104
+ for hidden_layer_params in hidden_layers:
105
+ layers.append(self.create_layer(hidden_layer_params))
106
+ if layer_params.get('memory_augmentation', True):
107
+ layers.append(MemoryAugmentationLayer(layer_params['output_size']))
108
+ if layer_params.get('hybrid_attention', True):
109
+ layers.append(HybridAttentionLayer(layer_params['output_size']))
110
+ if layer_params.get('dynamic_flash_attention', True):
111
+ layers.append(DynamicFlashAttentionLayer(layer_params['output_size']))
112
+ return nn.Sequential(*layers)
113
+
114
+ def forward(self, x: torch.Tensor, section_name: Optional[str] = None) -> torch.Tensor:
115
+ if section_name is not None:
116
+ if section_name not in self.sections:
117
+ raise KeyError(f"Section '{section_name}' not found in model")
118
+ for layer in self.sections[section_name]:
119
+ x = layer(x)
120
+ else:
121
+ for section_name, layers in self.sections.items():
122
+ for layer in layers:
123
+ x = layer(x)
124
+ return x
125
+
126
+ def parse_xml_file(file_path: str) -> List[Dict[str, Any]]:
127
+ tree = ET.parse(file_path)
128
+ root = tree.getroot()
129
+ layers = []
130
+ for layer in root.findall('.//layer'):
131
+ layer_params = {}
132
+ layer_params['input_size'] = int(layer.get('input_size', 128))
133
+ layer_params['output_size'] = int(layer.get('output_size', 256))
134
+ layer_params['activation'] = layer.get('activation', 'relu').lower()
135
+ if layer_params['activation'] not in ['relu', 'tanh', 'sigmoid', 'none']:
136
+ raise ValueError(f"Unsupported activation function: {layer_params['activation']}")
137
+ if layer_params['input_size'] <= 0 or layer_params['output_size'] <= 0:
138
+ raise ValueError("Layer dimensions must be positive integers")
139
+ layers.append(layer_params)
140
+ if not layers:
141
+ layers.append({
142
+ 'input_size': 128,
143
+ 'output_size': 256,
144
+ 'activation': 'relu'
145
+ })
146
+ return layers
147
+
148
+ def create_model_from_folder(folder_path: str) -> DynamicModel:
149
+ sections = defaultdict(list)
150
+ if not os.path.exists(folder_path):
151
+ print(f"Warning: Folder {folder_path} does not exist. Creating model with default configuration.")
152
+ return DynamicModel({})
153
+ xml_files_found = False
154
+ for root, dirs, files in os.walk(folder_path):
155
+ for file in files:
156
+ if file.endswith('.xml'):
157
+ xml_files_found = True
158
+ file_path = os.path.join(root, file)
159
+ try:
160
+ layers = parse_xml_file(file_path)
161
+ section_name = os.path.basename(root).replace('.', '_')
162
+ sections[section_name].extend(layers)
163
+ except Exception as e:
164
+ print(f"Error processing {file_path}: {str(e)}")
165
+ if not xml_files_found:
166
+ print("Warning: No XML files found. Creating model with default configuration.")
167
+ return DynamicModel({})
168
+ return DynamicModel(dict(sections))
169
+
170
+ def main():
171
+ print(Fore.CYAN + "Starting conversion...")
172
+
173
+ # Create the dynamic model from the folder
174
+ model = create_model_from_folder(file_path)
175
+ print(f"Created dynamic PyTorch model with sections: {list(model.sections.keys())}")
176
+
177
+ # Print the model architecture
178
+ print(model)
179
+
180
+ # Ensure the input tensor size matches the expected input size
181
+ first_section = next(iter(model.sections.keys()))
182
+ first_layer = model.sections[first_section][0]
183
+ input_features = first_layer[0].in_features
184
+ sample_input = torch.randn(1, input_features)
185
+ output = model(sample_input)
186
+ print(f"Sample output shape: {output.shape}")
187
+
188
+ # Training setup
189
+ accelerator = Accelerator()
190
+ optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
191
+ criterion = nn.CrossEntropyLoss()
192
+ num_epochs = 10
193
+ dataset = TensorDataset(
194
+ torch.randn(100, input_features),
195
+ torch.randint(0, 2, (100,))
196
+ )
197
+ train_dataloader = DataLoader(
198
+ dataset,
199
+ batch_size=8, # Reduced batch size
200
+ shuffle=True
201
+ )
202
+ model, optimizer, train_dataloader = accelerator.prepare(
203
+ model, optimizer, train_dataloader
204
+ )
205
+ scaler = GradScaler() # Mixed precision training
206
+
207
+ # Training loop
208
+ for epoch in range(num_epochs):
209
+ model.train()
210
+ total_loss = 0
211
+ for batch_idx, (inputs, labels) in enumerate(train_dataloader):
212
+ optimizer.zero_grad()
213
+ with autocast(): # Mixed precision training
214
+ outputs = model(inputs)
215
+ loss = criterion(outputs, labels)
216
+ scaler.scale(loss).backward()
217
+ scaler.step(optimizer)
218
+ scaler.update()
219
+ total_loss += loss.item()
220
+ avg_loss = total_loss / len(train_dataloader)
221
+ print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss:.4f}")
222
+
223
+ if __name__ == "__main__":
224
+ main()