Spaces:

Princess3
/

python

Runtime error

App Files Files Community

Princess3 commited on Oct 28, 2024

Commit

775c6bb

verified ·

1 Parent(s): bf3b766

Upload model.py

Browse files

Files changed (1) hide show

model.py +2 -130

model.py CHANGED Viewed

@@ -9,38 +9,9 @@ from accelerate import Accelerator
 class DynamicModel(nn.Module):
     def __init__(self, sections: Dict[str, List[Dict[str, Any]]]):
-        """
-        Initialize the DynamicModel with configurable neural network sections.
-        Args:
-            sections (Dict[str, List[Dict[str, Any]]]): Dictionary mapping section names to lists of layer configurations.
-                Each layer configuration is a dictionary containing:
-                - input_size (int): Size of input features
-                - output_size (int): Size of output features
-                - activation (str, optional): Activation function name ('relu', 'tanh', 'sigmoid', etc.)
-                - dropout (float, optional): Dropout rate
-                - batch_norm (bool, optional): Whether to use batch normalization
-                - hidden_layers (List[Dict[str, Any]], optional): List of hidden layer configurations
-                - memory_augmentation (bool, optional): Whether to add a memory augmentation layer
-                - hybrid_attention (bool, optional): Whether to add a hybrid attention layer
-                - dynamic_flash_attention (bool, optional): Whether to add a dynamic flash attention layer
-        Example:
-            sections = {
-                'encoder': [
-                    {'input_size': 128, 'output_size': 256, 'activation': 'relu', 'batch_norm': True},
-                    {'input_size': 256, 'output_size': 512, 'activation': 'leaky_relu', 'dropout': 0.1}
-                ],
-                'decoder': [
-                    {'input_size': 512, 'output_size': 256, 'activation': 'elu'},
-                    {'input_size': 256, 'output_size': 128, 'activation': 'tanh'}
-                ]
-            }
-        """
         super(DynamicModel, self).__init__()
         self.sections = nn.ModuleDict()
-        # Default section configuration if none provided
         if not sections:
             sections = {
                 'default': [{
@@ -52,47 +23,19 @@ class DynamicModel(nn.Module):
                 }]
             }
-        # Initialize each section with its layer configurations
         for section_name, layers in sections.items():
             self.sections[section_name] = nn.ModuleList()
             for layer_params in layers:
                 self.sections[section_name].append(self.create_layer(layer_params))
     def create_layer(self, layer_params: Dict[str, Any]) -> nn.Module:
-        """
-        Creates a neural network layer based on provided parameters.
-        Args:
-            layer_params (Dict[str, Any]): Dictionary containing layer configuration
-                Required keys:
-                - input_size (int): Size of input features
-                - output_size (int): Size of output features
-                Optional keys:
-                - activation (str): Activation function name ('relu', 'tanh', 'sigmoid', None)
-                - dropout (float): Dropout rate if needed
-                - batch_norm (bool): Whether to use batch normalization
-                - hidden_layers (List[Dict[str, Any]]): List of hidden layer configurations
-                - memory_augmentation (bool): Whether to add a memory augmentation layer
-                - hybrid_attention (bool): Whether to add a hybrid attention layer
-                - dynamic_flash_attention (bool): Whether to add a dynamic flash attention layer
-        Returns:
-            nn.Module: Configured neural network layer with activation
-        Raises:
-            KeyError: If required parameters are missing
-            ValueError: If activation function is not supported
-        """
         layers = []
-        # Add linear layer
         layers.append(nn.Linear(layer_params['input_size'], layer_params['output_size']))
-        # Add batch normalization if specified
         if layer_params.get('batch_norm', False):
             layers.append(nn.BatchNorm1d(layer_params['output_size']))
-        # Add activation function
         activation = layer_params.get('activation', 'relu')
         if activation == 'relu':
             layers.append(nn.ReLU(inplace=True))
@@ -107,43 +50,25 @@ class DynamicModel(nn.Module):
         elif activation is not None:
             raise ValueError(f"Unsupported activation function: {activation}")
-        # Add dropout if specified
         if dropout_rate := layer_params.get('dropout', 0.0):
             layers.append(nn.Dropout(p=dropout_rate))
-        # Add hidden layers if specified
         if hidden_layers := layer_params.get('hidden_layers', []):
             for hidden_layer_params in hidden_layers:
                 layers.append(self.create_layer(hidden_layer_params))
-        # Add memory augmentation layer if specified
         if layer_params.get('memory_augmentation', False):
             layers.append(MemoryAugmentationLayer(layer_params['output_size']))
-        # Add hybrid attention layer if specified
         if layer_params.get('hybrid_attention', False):
             layers.append(HybridAttentionLayer(layer_params['output_size']))
-        # Add dynamic flash attention layer if specified
         if layer_params.get('dynamic_flash_attention', False):
             layers.append(DynamicFlashAttentionLayer(layer_params['output_size']))
         return nn.Sequential(*layers)
     def forward(self, x: torch.Tensor, section_name: Optional[str] = None) -> torch.Tensor:
-        """
-        Forward pass through the dynamic model architecture.
-        Args:
-            x (torch.Tensor): Input tensor to process
-            section_name (Optional[str]): Specific section to process. If None, processes all sections
-        Returns:
-            torch.Tensor: Processed output tensor
-        Raises:
-            KeyError: If specified section_name doesn't exist
-        """
         if section_name is not None:
             if section_name not in self.sections:
                 raise KeyError(f"Section '{section_name}' not found in model")
@@ -184,19 +109,6 @@ class DynamicFlashAttentionLayer(nn.Module):
         return attn_output.squeeze(1)
 def parse_xml_file(file_path: str) -> List[Dict[str, Any]]:
-    """
-    Parses an XML configuration file to extract layer parameters for neural network construction.
-    Args:
-        file_path (str): Path to the XML configuration file
-    Returns:
-        List[Dict[str, Any]]: List of dictionaries containing layer configurations
-    Raises:
-        ET.ParseError: If XML file is malformed
-        KeyError: If required attributes are missing in XML
-    """
     tree = ET.parse(file_path)
     root = tree.getroot()
@@ -207,18 +119,15 @@ def parse_xml_file(file_path: str) -> List[Dict[str, Any]]:
         layer_params['output_size'] = int(layer.get('output_size', 256))
         layer_params['activation'] = layer.get('activation', 'relu').lower()
-        # Validate activation function
         if layer_params['activation'] not in ['relu', 'tanh', 'sigmoid', 'none']:
             raise ValueError(f"Unsupported activation function: {layer_params['activation']}")
-        # Validate dimensions
         if layer_params['input_size'] <= 0 or layer_params['output_size'] <= 0:
             raise ValueError("Layer dimensions must be positive integers")
         layers.append(layer_params)
     if not layers:
-        # Fallback to default configuration if no layers found
         layers.append({
             'input_size': 128,
             'output_size': 256,
@@ -228,23 +137,6 @@ def parse_xml_file(file_path: str) -> List[Dict[str, Any]]:
     return layers
 def create_model_from_folder(folder_path: str) -> DynamicModel:
-    """
-    Creates a DynamicModel instance by parsing XML files in the specified folder structure.
-    Each subfolder represents a model section, and XML files within contain layer configurations.
-    The function recursively walks through the folder structure, processing all XML files to build
-    the model architecture.
-    Args:
-        folder_path (str): Path to the root folder containing XML configuration files
-    Returns:
-        DynamicModel: A configured neural network model based on the XML specifications
-    Raises:
-        FileNotFoundError: If the specified folder path doesn't exist
-        ET.ParseError: If XML parsing fails for any configuration file
-    """
     sections = defaultdict(list)
     if not os.path.exists(folder_path):
@@ -259,7 +151,7 @@ def create_model_from_folder(folder_path: str) -> DynamicModel:
                 file_path = os.path.join(root, file)
                 try:
                     layers = parse_xml_file(file_path)
-                    section_name = os.path.basename(root).replace('.', '_')  # Replace periods with underscores
                     sections[section_name].extend(layers)
                 except Exception as e:
                     print(f"Error processing {file_path}: {str(e)}")
@@ -271,43 +163,25 @@ def create_model_from_folder(folder_path: str) -> DynamicModel:
     return DynamicModel(dict(sections))
 def main():
-    """
-    Main function that demonstrates the creation and training of a dynamic PyTorch model.
-    This function:
-    1. Creates a dynamic model from XML configurations
-    2. Sets up distributed training environment using Accelerator
-    3. Configures optimization components (optimizer, loss function)
-    4. Creates synthetic dataset for demonstration
-    5. Implements distributed training loop with loss tracking
-    The model architecture is determined by XML files in the 'Xml_Data' folder,
-    where each subfolder represents a model section containing layer configurations.
-    """
     folder_path = 'data'
     model = create_model_from_folder(folder_path)
     print(f"Created dynamic PyTorch model with sections: {list(model.sections.keys())}")
-    # Dynamically determine input size from first layer configuration
     first_section = next(iter(model.sections.keys()))
     first_layer = model.sections[first_section][0]
     input_features = first_layer[0].in_features
-    # Validate model with sample input
     sample_input = torch.randn(1, input_features)
     output = model(sample_input)
     print(f"Sample output shape: {output.shape}")
-    # Initialize distributed training components
     accelerator = Accelerator()
-    # Configure training parameters and optimization components
     optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
     criterion = nn.CrossEntropyLoss()
     num_epochs = 10
-    # Generate synthetic dataset for demonstration purposes
     dataset = torch.utils.data.TensorDataset(
         torch.randn(100, input_features),
         torch.randint(0, 2, (100,))
@@ -318,14 +192,12 @@ def main():
         shuffle=True
     )
-    # Prepare model, optimizer, and dataloader for distributed training
     model, optimizer, train_dataloader = accelerator.prepare(
         model,
         optimizer,
         train_dataloader
     )
-    # Execute training loop with distributed processing
     for epoch in range(num_epochs):
         model.train()
         total_loss = 0

 class DynamicModel(nn.Module):
     def __init__(self, sections: Dict[str, List[Dict[str, Any]]]):
         super(DynamicModel, self).__init__()
         self.sections = nn.ModuleDict()
         if not sections:
             sections = {
                 'default': [{
                 }]
             }
         for section_name, layers in sections.items():
             self.sections[section_name] = nn.ModuleList()
             for layer_params in layers:
+                print(f"Creating layer in section '{section_name}' with params: {layer_params}")
                 self.sections[section_name].append(self.create_layer(layer_params))
     def create_layer(self, layer_params: Dict[str, Any]) -> nn.Module:
         layers = []
         layers.append(nn.Linear(layer_params['input_size'], layer_params['output_size']))
         if layer_params.get('batch_norm', False):
             layers.append(nn.BatchNorm1d(layer_params['output_size']))
         activation = layer_params.get('activation', 'relu')
         if activation == 'relu':
             layers.append(nn.ReLU(inplace=True))
         elif activation is not None:
             raise ValueError(f"Unsupported activation function: {activation}")
         if dropout_rate := layer_params.get('dropout', 0.0):
             layers.append(nn.Dropout(p=dropout_rate))
         if hidden_layers := layer_params.get('hidden_layers', []):
             for hidden_layer_params in hidden_layers:
                 layers.append(self.create_layer(hidden_layer_params))
         if layer_params.get('memory_augmentation', False):
             layers.append(MemoryAugmentationLayer(layer_params['output_size']))
         if layer_params.get('hybrid_attention', False):
             layers.append(HybridAttentionLayer(layer_params['output_size']))
         if layer_params.get('dynamic_flash_attention', False):
             layers.append(DynamicFlashAttentionLayer(layer_params['output_size']))
         return nn.Sequential(*layers)
     def forward(self, x: torch.Tensor, section_name: Optional[str] = None) -> torch.Tensor:
         if section_name is not None:
             if section_name not in self.sections:
                 raise KeyError(f"Section '{section_name}' not found in model")
         return attn_output.squeeze(1)
 def parse_xml_file(file_path: str) -> List[Dict[str, Any]]:
     tree = ET.parse(file_path)
     root = tree.getroot()
         layer_params['output_size'] = int(layer.get('output_size', 256))
         layer_params['activation'] = layer.get('activation', 'relu').lower()
         if layer_params['activation'] not in ['relu', 'tanh', 'sigmoid', 'none']:
             raise ValueError(f"Unsupported activation function: {layer_params['activation']}")
         if layer_params['input_size'] <= 0 or layer_params['output_size'] <= 0:
             raise ValueError("Layer dimensions must be positive integers")
         layers.append(layer_params)
     if not layers:
         layers.append({
             'input_size': 128,
             'output_size': 256,
     return layers
 def create_model_from_folder(folder_path: str) -> DynamicModel:
     sections = defaultdict(list)
     if not os.path.exists(folder_path):
                 file_path = os.path.join(root, file)
                 try:
                     layers = parse_xml_file(file_path)
+                    section_name = os.path.basename(root).replace('.', '_')
                     sections[section_name].extend(layers)
                 except Exception as e:
                     print(f"Error processing {file_path}: {str(e)}")
     return DynamicModel(dict(sections))
 def main():
     folder_path = 'data'
     model = create_model_from_folder(folder_path)
     print(f"Created dynamic PyTorch model with sections: {list(model.sections.keys())}")
     first_section = next(iter(model.sections.keys()))
     first_layer = model.sections[first_section][0]
     input_features = first_layer[0].in_features
     sample_input = torch.randn(1, input_features)
     output = model(sample_input)
     print(f"Sample output shape: {output.shape}")
     accelerator = Accelerator()
     optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
     criterion = nn.CrossEntropyLoss()
     num_epochs = 10
     dataset = torch.utils.data.TensorDataset(
         torch.randn(100, input_features),
         torch.randint(0, 2, (100,))
         shuffle=True
     )
     model, optimizer, train_dataloader = accelerator.prepare(
         model,
         optimizer,
         train_dataloader
     )
     for epoch in range(num_epochs):
         model.train()
         total_loss = 0