Spaces:
Sleeping
Sleeping
from abc import ABC | |
from typing import Optional | |
from pydantic import BaseModel, Field | |
### DTO Definitions | |
class QuantizationConfig(ABC, BaseModel): | |
pass | |
class ConvertRequest(ABC, BaseModel): | |
hf_model_name: str | |
hf_tokenizer_name: Optional[str] = Field(None, description="Hugging Face tokenizer name. Defaults to hf_model_name") | |
hf_token: Optional[str] = Field(None, description="Hugging Face token for private models") | |
hf_push_repo: Optional[str] = Field(None, description="Hugging Face repo to push the converted model. If not provided, the model will be downloaded only.") | |
### ------- | |
### Quantization Configurations | |
class AWQQuantizationConfig(QuantizationConfig): | |
zero_point: Optional[bool] = Field(True, description="Use zero point quantization") | |
q_group_size: Optional[int] = Field(128, description="Quantization group size") | |
w_bit: Optional[int] = Field(4, description="Weight bit") | |
version: Optional[str] = Field("GEMM", description="Quantization version") | |
class GPTQQuantizationConfig(QuantizationConfig): | |
pass | |
class GGUFQuantizationConfig(QuantizationConfig): | |
pass | |
class AWQConvertionRequest(ConvertRequest): | |
quantization_config: Optional[AWQQuantizationConfig] = Field( | |
default_factory=lambda: AWQQuantizationConfig(), | |
description="AWQ quantization configuration" | |
) | |
class GPTQConvertionRequest(ConvertRequest): | |
quantization_config: Optional[GPTQQuantizationConfig] = Field( | |
default_factory=lambda: GPTQQuantizationConfig(), | |
description="GPTQ quantization configuration" | |
) | |
class GGUFConvertionRequest(ConvertRequest): | |
quantization_config: Optional[GGUFQuantizationConfig] = Field( | |
default_factory=lambda: GGUFQuantizationConfig(), | |
description="GGUF quantization configuration" | |
) | |
### ------- |