Spaces:

TencentARC
/

ImageConductor

Running on Zero

App Files Files Community

ImageConductor / peft /tuners /poly /config.py

Yw22

init demo

d711508 3 months ago

raw

history blame

3.78 kB

	# Copyright 2023-present the HuggingFace Inc. team.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	from dataclasses import dataclass, field
	from typing import List, Literal, Optional, Union

	from peft.config import PeftConfig
	from peft.utils import PeftType


	@dataclass
	class PolyConfig(PeftConfig):
	"""
	This is the configuration class to store the configuration of a [`PolyModel`].
	- [Polytropon (Poly)](https://arxiv.org/abs/2202.13914)
	- [Multi-Head Routing (MHR)](https://arxiv.org/abs/2211.03831)

	Args:
	r (`int`): Attention dimension of each Lora in Poly.
	target_modules (`Union[List[str],str]`): The names of the modules to apply Poly to.
	modules_to_save (`List[str]`): List of modules apart from Poly layers to be set as trainable
	and saved in the final checkpoint.
	init_weights (bool): Whether to perform initialization of Poly weights.
	poly_type (`Literal["poly"]`): The variant of the Poly module to use. Currently, only "poly"
	is supported.
	n_tasks (`int`): The number of tasks in a multitasking scenario.
	n_skills (`int`): The number of skills (LoRA) in each Poly layer.
	n_splits (`int`): The number of splits within each LoRA of a Poly layer. A value greater
	than 1 indicates the use of Multi-Head Routing (MHR).
	"""

	r: int = field(default=8, metadata={"help": "Lora attention dimension"})
	target_modules: Optional[Union[List[str], str]] = field(
	default=None,
	metadata={
	"help": "List of module names or regex expression of the module names to replace with Poly."
	"For example, ['q', 'v'] or '.decoder.(SelfAttention\|EncDecAttention).*(q\|v)$' "
	},
	)
	modules_to_save: Optional[List[str]] = field(
	default=None,
	metadata={
	"help": "List of modules apart from Poly layers to be set as trainable and saved in the final checkpoint. "
	"For example, in Sequence Classification or Token Classification tasks, "
	"the final layer `classifier/score` are randomly initialized and as such need to be trainable and saved."
	},
	)
	init_weights: bool = field(
	default=True,
	metadata={
	"help": (
	"Whether to initialize the weights of the Poly layers with their default initialization. Don't change "
	"this setting, except if you know exactly what you're doing."
	),
	},
	)
	poly_type: Literal["poly"] = field(
	default="poly",
	metadata={"help": 'Type of Poly modules to be used. Currently only "poly" is supported.'},
	)
	n_tasks: int = field(
	default=1,
	metadata={"help": "Number of tasks in multitasking scenario."},
	)
	n_skills: int = field(
	default=4,
	metadata={"help": "Number of skills (LoRA) in each Poly layer."},
	)
	n_splits: int = field(
	default=1,
	metadata={"help": "Number of splits within each LoRA of a Poly layer."},
	)

	def __post_init__(self):
	self.peft_type = PeftType.POLY
	self.target_modules = (
	set(self.target_modules) if isinstance(self.target_modules, list) else self.target_modules
	)