Spaces:

Princess3
/

python

Runtime error

App Files Files Community

Princess3 commited on Oct 28, 2024

Commit

9a38883

verified ·

1 Parent(s): cd02111

Upload model.py

Browse files

Files changed (1) hide show

model.py +223 -26

model.py CHANGED Viewed

@@ -9,58 +9,242 @@ from accelerate import Accelerator
 class DynamicModel(nn.Module):
     def __init__(self, sections: Dict[str, List[Dict[str, Any]]]):
         super(DynamicModel, self).__init__()
         self.sections = nn.ModuleDict()
-        # Default section if none provided
         if not sections:
             sections = {
                 'default': [{
                     'input_size': 128,
                     'output_size': 256,
-                    'activation': 'relu'
                 }]
             }
         for section_name, layers in sections.items():
             self.sections[section_name] = nn.ModuleList()
             for layer_params in layers:
                 self.sections[section_name].append(self.create_layer(layer_params))
     def create_layer(self, layer_params: Dict[str, Any]) -> nn.Module:
-        layer = nn.Linear(layer_params['input_size'], layer_params['output_size'])
         activation = layer_params.get('activation', 'relu')
         if activation == 'relu':
-            return nn.Sequential(layer, nn.ReLU())
         elif activation == 'tanh':
-            return nn.Sequential(layer, nn.Tanh())
         elif activation == 'sigmoid':
-            return nn.Sequential(layer, nn.Sigmoid())
-        else:
-            return layer
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        for section_name, layers in self.sections.items():
-            for layer in layers:
                 x = layer(x)
         return x
 def parse_xml_file(file_path: str) -> List[Dict[str, Any]]:
     tree = ET.parse(file_path)
     root = tree.getroot()
     layers = []
-    for prov in root.findall('.//prov'):
-        layer_params = {
             'input_size': 128,
             'output_size': 256,
             'activation': 'relu'
-        }
-        layers.append(layer_params)
     return layers
 def create_model_from_folder(folder_path: str) -> DynamicModel:
     sections = defaultdict(list)
     if not os.path.exists(folder_path):
@@ -87,30 +271,43 @@ def create_model_from_folder(folder_path: str) -> DynamicModel:
     return DynamicModel(dict(sections))
 def main():
-    folder_path = 'data/'
     model = create_model_from_folder(folder_path)
     print(f"Created dynamic PyTorch model with sections: {list(model.sections.keys())}")
-    # Get first section's first layer's input size dynamically
     first_section = next(iter(model.sections.keys()))
     first_layer = model.sections[first_section][0]
     input_features = first_layer[0].in_features
-    # Create sample input tensor matching the model's expected input size
     sample_input = torch.randn(1, input_features)
     output = model(sample_input)
     print(f"Sample output shape: {output.shape}")
-    # Initialize accelerator for distributed training
     accelerator = Accelerator()
-    # Setup optimization components
     optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
     criterion = nn.CrossEntropyLoss()
     num_epochs = 10
-    # Create synthetic dataset for demonstration
     dataset = torch.utils.data.TensorDataset(
         torch.randn(100, input_features),
         torch.randint(0, 2, (100,))
@@ -121,14 +318,14 @@ def main():
         shuffle=True
     )
-    # Prepare for distributed training
     model, optimizer, train_dataloader = accelerator.prepare(
         model,
         optimizer,
         train_dataloader
     )
-    # Training loop
     for epoch in range(num_epochs):
         model.train()
         total_loss = 0
@@ -139,7 +336,7 @@ def main():
             accelerator.backward(loss)
             optimizer.step()
             total_loss += loss.item()
         avg_loss = total_loss / len(train_dataloader)
         print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss:.4f}")

 class DynamicModel(nn.Module):
     def __init__(self, sections: Dict[str, List[Dict[str, Any]]]):
+        """
+        Initialize the DynamicModel with configurable neural network sections.
+        Args:
+            sections (Dict[str, List[Dict[str, Any]]]): Dictionary mapping section names to lists of layer configurations.
+                Each layer configuration is a dictionary containing:
+                - input_size (int): Size of input features
+                - output_size (int): Size of output features
+                - activation (str, optional): Activation function name ('relu', 'tanh', 'sigmoid', etc.)
+                - dropout (float, optional): Dropout rate
+                - batch_norm (bool, optional): Whether to use batch normalization
+                - hidden_layers (List[Dict[str, Any]], optional): List of hidden layer configurations
+                - memory_augmentation (bool, optional): Whether to add a memory augmentation layer
+                - hybrid_attention (bool, optional): Whether to add a hybrid attention layer
+                - dynamic_flash_attention (bool, optional): Whether to add a dynamic flash attention layer
+        Example:
+            sections = {
+                'encoder': [
+                    {'input_size': 128, 'output_size': 256, 'activation': 'relu', 'batch_norm': True},
+                    {'input_size': 256, 'output_size': 512, 'activation': 'leaky_relu', 'dropout': 0.1}
+                ],
+                'decoder': [
+                    {'input_size': 512, 'output_size': 256, 'activation': 'elu'},
+                    {'input_size': 256, 'output_size': 128, 'activation': 'tanh'}
+                ]
+            }
+        """
         super(DynamicModel, self).__init__()
         self.sections = nn.ModuleDict()
+        # Default section configuration if none provided
         if not sections:
             sections = {
                 'default': [{
                     'input_size': 128,
                     'output_size': 256,
+                    'activation': 'relu',
+                    'batch_norm': True,
+                    'dropout': 0.1
                 }]
             }
+        # Initialize each section with its layer configurations
         for section_name, layers in sections.items():
             self.sections[section_name] = nn.ModuleList()
             for layer_params in layers:
                 self.sections[section_name].append(self.create_layer(layer_params))
     def create_layer(self, layer_params: Dict[str, Any]) -> nn.Module:
+        """
+        Creates a neural network layer based on provided parameters.
+        Args:
+            layer_params (Dict[str, Any]): Dictionary containing layer configuration
+                Required keys:
+                - input_size (int): Size of input features
+                - output_size (int): Size of output features
+                Optional keys:
+                - activation (str): Activation function name ('relu', 'tanh', 'sigmoid', None)
+                - dropout (float): Dropout rate if needed
+                - batch_norm (bool): Whether to use batch normalization
+                - hidden_layers (List[Dict[str, Any]]): List of hidden layer configurations
+                - memory_augmentation (bool): Whether to add a memory augmentation layer
+                - hybrid_attention (bool): Whether to add a hybrid attention layer
+                - dynamic_flash_attention (bool): Whether to add a dynamic flash attention layer
+        Returns:
+            nn.Module: Configured neural network layer with activation
+        Raises:
+            KeyError: If required parameters are missing
+            ValueError: If activation function is not supported
+        """
+        layers = []
+        # Add linear layer
+        layers.append(nn.Linear(layer_params['input_size'], layer_params['output_size']))
+        # Add batch normalization if specified
+        if layer_params.get('batch_norm', False):
+            layers.append(nn.BatchNorm1d(layer_params['output_size']))
+        # Add activation function
         activation = layer_params.get('activation', 'relu')
         if activation == 'relu':
+            layers.append(nn.ReLU(inplace=True))
         elif activation == 'tanh':
+            layers.append(nn.Tanh())
         elif activation == 'sigmoid':
+            layers.append(nn.Sigmoid())
+        elif activation == 'leaky_relu':
+            layers.append(nn.LeakyReLU(negative_slope=0.01, inplace=True))
+        elif activation == 'elu':
+            layers.append(nn.ELU(alpha=1.0, inplace=True))
+        elif activation is not None:
+            raise ValueError(f"Unsupported activation function: {activation}")
+        # Add dropout if specified
+        if dropout_rate := layer_params.get('dropout', 0.0):
+            layers.append(nn.Dropout(p=dropout_rate))
+        # Add hidden layers if specified
+        if hidden_layers := layer_params.get('hidden_layers', []):
+            for hidden_layer_params in hidden_layers:
+                layers.append(self.create_layer(hidden_layer_params))
+        # Add memory augmentation layer if specified
+        if layer_params.get('memory_augmentation', False):
+            layers.append(MemoryAugmentationLayer(layer_params['output_size']))
+        # Add hybrid attention layer if specified
+        if layer_params.get('hybrid_attention', False):
+            layers.append(HybridAttentionLayer(layer_params['output_size']))
+        # Add dynamic flash attention layer if specified
+        if layer_params.get('dynamic_flash_attention', False):
+            layers.append(DynamicFlashAttentionLayer(layer_params['output_size']))
+        return nn.Sequential(*layers)
+    def forward(self, x: torch.Tensor, section_name: Optional[str] = None) -> torch.Tensor:
+        """
+        Forward pass through the dynamic model architecture.
+        Args:
+            x (torch.Tensor): Input tensor to process
+            section_name (Optional[str]): Specific section to process. If None, processes all sections
+        Returns:
+            torch.Tensor: Processed output tensor
+        Raises:
+            KeyError: If specified section_name doesn't exist
+        """
+        if section_name is not None:
+            if section_name not in self.sections:
+                raise KeyError(f"Section '{section_name}' not found in model")
+            for layer in self.sections[section_name]:
                 x = layer(x)
+        else:
+            for section_name, layers in self.sections.items():
+                for layer in layers:
+                    x = layer(x)
         return x
+class MemoryAugmentationLayer(nn.Module):
+    def __init__(self, size: int):
+        super(MemoryAugmentationLayer, self).__init__()
+        self.memory = nn.Parameter(torch.randn(size))
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return x + self.memory
+class HybridAttentionLayer(nn.Module):
+    def __init__(self, size: int):
+        super(HybridAttentionLayer, self).__init__()
+        self.attention = nn.MultiheadAttention(size, num_heads=8)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = x.unsqueeze(1)  # Add sequence dimension
+        attn_output, _ = self.attention(x, x, x)
+        return attn_output.squeeze(1)
+class DynamicFlashAttentionLayer(nn.Module):
+    def __init__(self, size: int):
+        super(DynamicFlashAttentionLayer, self).__init__()
+        self.attention = nn.MultiheadAttention(size, num_heads=8)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = x.unsqueeze(1)  # Add sequence dimension
+        attn_output, _ = self.attention(x, x, x)
+        return attn_output.squeeze(1)
 def parse_xml_file(file_path: str) -> List[Dict[str, Any]]:
+    """
+    Parses an XML configuration file to extract layer parameters for neural network construction.
+    Args:
+        file_path (str): Path to the XML configuration file
+    Returns:
+        List[Dict[str, Any]]: List of dictionaries containing layer configurations
+    Raises:
+        ET.ParseError: If XML file is malformed
+        KeyError: If required attributes are missing in XML
+    """
     tree = ET.parse(file_path)
     root = tree.getroot()
     layers = []
+    for layer in root.findall('.//layer'):
+        layer_params = {}
+        layer_params['input_size'] = int(layer.get('input_size', 128))
+        layer_params['output_size'] = int(layer.get('output_size', 256))
+        layer_params['activation'] = layer.get('activation', 'relu').lower()
+        # Validate activation function
+        if layer_params['activation'] not in ['relu', 'tanh', 'sigmoid', 'none']:
+            raise ValueError(f"Unsupported activation function: {layer_params['activation']}")
+        # Validate dimensions
+        if layer_params['input_size'] <= 0 or layer_params['output_size'] <= 0:
+            raise ValueError("Layer dimensions must be positive integers")
+        layers.append(layer_params)
+    if not layers:
+        # Fallback to default configuration if no layers found
+        layers.append({
             'input_size': 128,
             'output_size': 256,
             'activation': 'relu'
+        })
     return layers
 def create_model_from_folder(folder_path: str) -> DynamicModel:
+    """
+    Creates a DynamicModel instance by parsing XML files in the specified folder structure.
+    Each subfolder represents a model section, and XML files within contain layer configurations.
+    The function recursively walks through the folder structure, processing all XML files to build
+    the model architecture.
+    Args:
+        folder_path (str): Path to the root folder containing XML configuration files
+    Returns:
+        DynamicModel: A configured neural network model based on the XML specifications
+    Raises:
+        FileNotFoundError: If the specified folder path doesn't exist
+        ET.ParseError: If XML parsing fails for any configuration file
+    """
     sections = defaultdict(list)
     if not os.path.exists(folder_path):
     return DynamicModel(dict(sections))
 def main():
+    """
+    Main function that demonstrates the creation and training of a dynamic PyTorch model.
+    This function:
+    1. Creates a dynamic model from XML configurations
+    2. Sets up distributed training environment using Accelerator
+    3. Configures optimization components (optimizer, loss function)
+    4. Creates synthetic dataset for demonstration
+    5. Implements distributed training loop with loss tracking
+    The model architecture is determined by XML files in the 'Xml_Data' folder,
+    where each subfolder represents a model section containing layer configurations.
+    """
+    folder_path = 'Xml_Data'
     model = create_model_from_folder(folder_path)
     print(f"Created dynamic PyTorch model with sections: {list(model.sections.keys())}")
+    # Dynamically determine input size from first layer configuration
     first_section = next(iter(model.sections.keys()))
     first_layer = model.sections[first_section][0]
     input_features = first_layer[0].in_features
+    # Validate model with sample input
     sample_input = torch.randn(1, input_features)
     output = model(sample_input)
     print(f"Sample output shape: {output.shape}")
+    # Initialize distributed training components
     accelerator = Accelerator()
+    # Configure training parameters and optimization components
     optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
     criterion = nn.CrossEntropyLoss()
     num_epochs = 10
+    # Generate synthetic dataset for demonstration purposes
     dataset = torch.utils.data.TensorDataset(
         torch.randn(100, input_features),
         torch.randint(0, 2, (100,))
         shuffle=True
     )
+    # Prepare model, optimizer, and dataloader for distributed training
     model, optimizer, train_dataloader = accelerator.prepare(
         model,
         optimizer,
         train_dataloader
     )
+    # Execute training loop with distributed processing
     for epoch in range(num_epochs):
         model.train()
         total_loss = 0
             accelerator.backward(loss)
             optimizer.step()
             total_loss += loss.item()
         avg_loss = total_loss / len(train_dataloader)
         print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss:.4f}")