Princess3 commited on
Commit
3af37cb
·
verified ·
1 Parent(s): f95d6bf

Upload 2 files

Browse files
Files changed (2) hide show
  1. model.py +5 -5
  2. model2.py +265 -0
model.py CHANGED
@@ -57,13 +57,13 @@ class DynamicModel(nn.Module):
57
  for hidden_layer_params in hidden_layers:
58
  layers.append(self.create_layer(hidden_layer_params))
59
 
60
- if layer_params.get('memory_augmentation', False):
61
  layers.append(MemoryAugmentationLayer(layer_params['output_size']))
62
 
63
- if layer_params.get('hybrid_attention', False):
64
  layers.append(HybridAttentionLayer(layer_params['output_size']))
65
 
66
- if layer_params.get('dynamic_flash_attention', False):
67
  layers.append(DynamicFlashAttentionLayer(layer_params['output_size']))
68
 
69
  return nn.Sequential(*layers)
@@ -143,7 +143,7 @@ def create_model_from_folder(folder_path: str) -> DynamicModel:
143
  print(f"Warning: Folder {folder_path} does not exist. Creating model with default configuration.")
144
  return DynamicModel({})
145
 
146
- xml_files_found = True
147
  for root, dirs, files in os.walk(folder_path):
148
  for file in files:
149
  if file.endswith('.xml'):
@@ -217,4 +217,4 @@ def main():
217
  print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss:.4f}")
218
 
219
  if __name__ == "__main__":
220
- main()
 
57
  for hidden_layer_params in hidden_layers:
58
  layers.append(self.create_layer(hidden_layer_params))
59
 
60
+ if layer_params.get('memory_augmentation', True):
61
  layers.append(MemoryAugmentationLayer(layer_params['output_size']))
62
 
63
+ if layer_params.get('hybrid_attention', True):
64
  layers.append(HybridAttentionLayer(layer_params['output_size']))
65
 
66
+ if layer_params.get('dynamic_flash_attention', True):
67
  layers.append(DynamicFlashAttentionLayer(layer_params['output_size']))
68
 
69
  return nn.Sequential(*layers)
 
143
  print(f"Warning: Folder {folder_path} does not exist. Creating model with default configuration.")
144
  return DynamicModel({})
145
 
146
+ xml_files_found = False
147
  for root, dirs, files in os.walk(folder_path):
148
  for file in files:
149
  if file.endswith('.xml'):
 
217
  print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss:.4f}")
218
 
219
  if __name__ == "__main__":
220
+ main()
model2.py ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import xml.etree.ElementTree as ET
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.nn.functional as F
6
+ from typing import List, Dict, Any, Optional
7
+ from collections import defaultdict
8
+ from accelerate import Accelerator
9
+ from transformers import AutoTokenizer, AutoModel
10
+ import faiss
11
+ import numpy as np
12
+
13
+ class DynamicModel(nn.Module):
14
+ def __init__(self, sections: Dict[str, List[Dict[str, Any]]]):
15
+ super(DynamicModel, self).__init__()
16
+ self.sections = nn.ModuleDict()
17
+
18
+ if not sections:
19
+ sections = {
20
+ 'default': [{
21
+ 'input_size': 128,
22
+ 'output_size': 256,
23
+ 'activation': 'relu',
24
+ 'batch_norm': True,
25
+ 'dropout': 0.1
26
+ }]
27
+ }
28
+
29
+ for section_name, layers in sections.items():
30
+ self.sections[section_name] = nn.ModuleList()
31
+ for layer_params in layers:
32
+ print(f"Creating layer in section '{section_name}' with params: {layer_params}")
33
+ self.sections[section_name].append(self.create_layer(layer_params))
34
+
35
+ def create_layer(self, layer_params: Dict[str, Any]) -> nn.Module:
36
+ layers = []
37
+ layers.append(nn.Linear(layer_params['input_size'], layer_params['output_size']))
38
+
39
+ if layer_params.get('batch_norm', False):
40
+ layers.append(nn.BatchNorm1d(layer_params['output_size']))
41
+
42
+ activation = layer_params.get('activation', 'relu')
43
+ if activation == 'relu':
44
+ layers.append(nn.ReLU(inplace=True))
45
+ elif activation == 'tanh':
46
+ layers.append(nn.Tanh())
47
+ elif activation == 'sigmoid':
48
+ layers.append(nn.Sigmoid())
49
+ elif activation == 'leaky_relu':
50
+ layers.append(nn.LeakyReLU(negative_slope=0.01, inplace=True))
51
+ elif activation == 'elu':
52
+ layers.append(nn.ELU(alpha=1.0, inplace=True))
53
+ elif activation is not None:
54
+ raise ValueError(f"Unsupported activation function: {activation}")
55
+
56
+ if dropout_rate := layer_params.get('dropout', 0.0):
57
+ layers.append(nn.Dropout(p=dropout_rate))
58
+
59
+ if hidden_layers := layer_params.get('hidden_layers', []):
60
+ for hidden_layer_params in hidden_layers:
61
+ layers.append(self.create_layer(hidden_layer_params))
62
+
63
+ if layer_params.get('memory_augmentation', False):
64
+ layers.append(MemoryAugmentationLayer(layer_params['output_size']))
65
+
66
+ if layer_params.get('hybrid_attention', False):
67
+ layers.append(HybridAttentionLayer(layer_params['output_size']))
68
+
69
+ if layer_params.get('dynamic_flash_attention', False):
70
+ layers.append(DynamicFlashAttentionLayer(layer_params['output_size']))
71
+
72
+ return nn.Sequential(*layers)
73
+
74
+ def forward(self, x: torch.Tensor, section_name: Optional[str] = None) -> torch.Tensor:
75
+ if section_name is not None:
76
+ if section_name not in self.sections:
77
+ raise KeyError(f"Section '{section_name}' not found in model")
78
+ for layer in self.sections[section_name]:
79
+ x = layer(x)
80
+ else:
81
+ for section_name, layers in self.sections.items():
82
+ for layer in layers:
83
+ x = layer(x)
84
+ return x
85
+
86
+ class MemoryAugmentationLayer(nn.Module):
87
+ def __init__(self, size: int):
88
+ super(MemoryAugmentationLayer, self).__init__()
89
+ self.memory = nn.Parameter(torch.randn(size))
90
+
91
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
92
+ return x + self.memory
93
+
94
+ class HybridAttentionLayer(nn.Module):
95
+ def __init__(self, size: int):
96
+ super(HybridAttentionLayer, self).__init__()
97
+ self.attention = nn.MultiheadAttention(size, num_heads=8)
98
+
99
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
100
+ x = x.unsqueeze(1) # Add sequence dimension
101
+ attn_output, _ = self.attention(x, x, x)
102
+ return attn_output.squeeze(1)
103
+
104
+ class DynamicFlashAttentionLayer(nn.Module):
105
+ def __init__(self, size: int):
106
+ super(DynamicFlashAttentionLayer, self).__init__()
107
+ self.attention = nn.MultiheadAttention(size, num_heads=8)
108
+
109
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
110
+ x = x.unsqueeze(1) # Add sequence dimension
111
+ attn_output, _ = self.attention(x, x, x)
112
+ return attn_output.squeeze(1)
113
+
114
+ def parse_xml_file(file_path: str) -> List[Dict[str, Any]]:
115
+ tree = ET.parse(file_path)
116
+ root = tree.getroot()
117
+
118
+ layers = []
119
+ for layer in root.findall('.//layer'):
120
+ layer_params = {}
121
+ layer_params['input_size'] = int(layer.get('input_size', 128))
122
+ layer_params['output_size'] = int(layer.get('output_size', 256))
123
+ layer_params['activation'] = layer.get('activation', 'relu').lower()
124
+
125
+ if layer_params['activation'] not in ['relu', 'tanh', 'sigmoid', 'none']:
126
+ raise ValueError(f"Unsupported activation function: {layer_params['activation']}")
127
+
128
+ if layer_params['input_size'] <= 0 or layer_params['output_size'] <= 0:
129
+ raise ValueError("Layer dimensions must be positive integers")
130
+
131
+ layers.append(layer_params)
132
+
133
+ if not layers:
134
+ layers.append({
135
+ 'input_size': 128,
136
+ 'output_size': 256,
137
+ 'activation': 'relu'
138
+ })
139
+
140
+ return layers
141
+
142
+ def create_model_from_folder(folder_path: str) -> DynamicModel:
143
+ sections = defaultdict(list)
144
+
145
+ if not os.path.exists(folder_path):
146
+ print(f"Warning: Folder {folder_path} does not exist. Creating model with default configuration.")
147
+ return DynamicModel({})
148
+
149
+ xml_files_found = False
150
+ for root, dirs, files in os.walk(folder_path):
151
+ for file in files:
152
+ if file.endswith('.xml'):
153
+ xml_files_found = True
154
+ file_path = os.path.join(root, file)
155
+ try:
156
+ layers = parse_xml_file(file_path)
157
+ section_name = os.path.basename(root).replace('.', '_')
158
+ sections[section_name].extend(layers)
159
+ except Exception as e:
160
+ print(f"Error processing {file_path}: {str(e)}")
161
+
162
+ if not xml_files_found:
163
+ print("Warning: No XML files found. Creating model with default configuration.")
164
+ return DynamicModel({})
165
+
166
+ return DynamicModel(dict(sections))
167
+
168
+ def create_embeddings_and_stores(folder_path: str, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
169
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
170
+ model = AutoModel.from_pretrained(model_name)
171
+
172
+ vector_store = faiss.IndexFlatL2(384) # Assuming 384-dimensional embeddings
173
+ doc_store = []
174
+
175
+ for root, dirs, files in os.walk(folder_path):
176
+ for file in files:
177
+ if file.endswith('.xml'):
178
+ file_path = os.path.join(root, file)
179
+ try:
180
+ tree = ET.parse(file_path)
181
+ root = tree.getroot()
182
+ for elem in root.iter():
183
+ if elem.text:
184
+ text = elem.text.strip()
185
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
186
+ with torch.no_grad():
187
+ embeddings = model(**inputs).last_hidden_state.mean(dim=1).numpy()
188
+ vector_store.add(embeddings)
189
+ doc_store.append(text)
190
+ except Exception as e:
191
+ print(f"Error processing {file_path}: {str(e)}")
192
+
193
+ return vector_store, doc_store
194
+
195
+ def query_vector_store(query: str, vector_store, doc_store, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
196
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
197
+ model = AutoModel.from_pretrained(model_name)
198
+
199
+ inputs = tokenizer(query, return_tensors="pt", truncation=True, padding=True)
200
+ with torch.no_grad():
201
+ query_embedding = model(**inputs).last_hidden_state.mean(dim=1).numpy()
202
+
203
+ D, I = vector_store.search(query_embedding, k=5) # Retrieve top 5 documents
204
+ results = [doc_store[i] for i in I[0]]
205
+ return results
206
+
207
+ def main():
208
+ folder_path = 'data'
209
+ model = create_model_from_folder(folder_path)
210
+
211
+ print(f"Created dynamic PyTorch model with sections: {list(model.sections.keys())}")
212
+
213
+ first_section = next(iter(model.sections.keys()))
214
+ first_layer = model.sections[first_section][0]
215
+ input_features = first_layer[0].in_features
216
+
217
+ sample_input = torch.randn(1, input_features)
218
+ output = model(sample_input)
219
+ print(f"Sample output shape: {output.shape}")
220
+
221
+ vector_store, doc_store = create_embeddings_and_stores(folder_path)
222
+
223
+ accelerator = Accelerator()
224
+
225
+ optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
226
+ criterion = nn.CrossEntropyLoss()
227
+ num_epochs = 10
228
+
229
+ dataset = torch.utils.data.TensorDataset(
230
+ torch.randn(100, input_features),
231
+ torch.randint(0, 2, (100,))
232
+ )
233
+ train_dataloader = torch.utils.data.DataLoader(
234
+ dataset,
235
+ batch_size=16,
236
+ shuffle=True
237
+ )
238
+
239
+ model, optimizer, train_dataloader = accelerator.prepare(
240
+ model,
241
+ optimizer,
242
+ train_dataloader
243
+ )
244
+
245
+ for epoch in range(num_epochs):
246
+ model.train()
247
+ total_loss = 0
248
+ for batch_idx, (inputs, labels) in enumerate(train_dataloader):
249
+ optimizer.zero_grad()
250
+ outputs = model(inputs)
251
+ loss = criterion(outputs, labels)
252
+ accelerator.backward(loss)
253
+ optimizer.step()
254
+ total_loss += loss.item()
255
+
256
+ avg_loss = total_loss / len(train_dataloader)
257
+ print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss:.4f}")
258
+
259
+ # Query the vector store after training
260
+ user_query = "example query text"
261
+ results = query_vector_store(user_query, vector_store, doc_store)
262
+ print(f"Query results: {results}")
263
+
264
+ if __name__ == "__main__":
265
+ main()