Skip to content

config

optimus_dl.recipe.metrics.config

Configuration for metrics evaluation recipe.

MetricsConfig dataclass

Bases: RegistryConfigStrict

Complete metrics evaluation configuration.

Parameters:

Name Type Description Default
args dict

dict() -> new empty dictionary dict(mapping) -> new dictionary initialized from a mapping object's (key, value) pairs dict(iterable) -> new dictionary initialized as if via: d = {} for k, v in iterable: d[k] = v dict(**kwargs) -> new dictionary initialized with the name=value pairs in the keyword argument list. For example: dict(one=1, two=2)

<class 'dict'>
common MetricsRecipeConfig

Configuration for metrics evaluation recipe common settings.

<dynamic>
model ModelConfig | None
None
data DataConfig
'???'
criterion CriterionConfig
'???'
metrics dict[str, list[dict]]

Metric configurations mapped by dataset name

<class 'dict'>
model_transforms list[ModelTransformConfig]

List of model transforms to apply

<dynamic>
loggers list[MetricsLoggerConfig] | None
None
model_builder Any
ModelBuilderConfig(_name='base')
criterion_builder Any
CriterionBuilderConfig(_name='base')
data_builder Any
DataBuilderConfig(_name='base')
checkpoint_manager Any
CheckpointManagerConfig(_name='base')
logger_manager Any
LoggerManagerConfig(_name='base')
evaluator Any
EvaluatorConfig(_name='base')
Source code in optimus_dl/recipe/metrics/config.py
@dataclass
class MetricsConfig(RegistryConfigStrict):
    """Complete metrics evaluation configuration."""

    args: dict = field(default_factory=dict)
    common: MetricsRecipeConfig = field(default_factory=MetricsRecipeConfig)

    model: ModelConfig | None = field(default=None)
    data: DataConfig = field(default=MISSING)
    criterion: CriterionConfig = field(default=MISSING)

    # Metrics configuration for MetricEngine, mapped by dataset name
    metrics: dict[str, list[dict]] = field(
        default_factory=dict,
        metadata={"description": "Metric configurations mapped by dataset name"},
    )

    # Model transforms configuration
    model_transforms: list[ModelTransformConfig] = field(
        default_factory=list,
        metadata={"description": "List of model transforms to apply"},
    )

    # Logging
    loggers: list[MetricsLoggerConfig] | None = field(default=None)

    # Dependency Injection Configs
    model_builder: Any = field(default_factory=lambda: ModelBuilderConfig(_name="base"))
    criterion_builder: Any = field(
        default_factory=lambda: CriterionBuilderConfig(_name="base")
    )
    data_builder: Any = field(default_factory=lambda: DataBuilderConfig(_name="base"))
    checkpoint_manager: Any = field(
        default_factory=lambda: CheckpointManagerConfig(_name="base")
    )
    logger_manager: Any = field(
        default_factory=lambda: LoggerManagerConfig(_name="base")
    )
    evaluator: Any = field(default_factory=lambda: EvaluatorConfig(_name="base"))

MetricsRecipeConfig dataclass

Configuration for metrics evaluation recipe common settings.

Parameters:

Name Type Description Default
name str

Experiment name for loggers

'metrics-eval'
seed int
42
data_seed int
42
output_path str

Base directory for outputs (logs, etc.)

'outputs/metrics'
checkpoint_path str | None

Path to checkpoint to load from

None
use_gpu bool
True
distributed DistributedConfig

Configuration for distributed training topologies.

Attributes: tp_size: Degree of Tensor Parallelism (number of GPUs to shard each layer across). sharding_world_size: Size of FSDP sharding groups. If None, defaults to the number of GPUs per node (intra-node sharding).

<dynamic>
max_iterations int | None

Max number of batches to process per dataset

None
tokenizer Any

Tokenizer configuration

None
Source code in optimus_dl/recipe/metrics/config.py
@dataclass
class MetricsRecipeConfig:
    """Configuration for metrics evaluation recipe common settings."""

    # Experiment name
    name: str = field(
        default="metrics-eval",
        metadata={"description": "Experiment name for loggers"},
    )

    # Reproducibility
    seed: int = field(default=42)
    data_seed: int = field(default=42)

    # Output
    output_path: str = field(
        default="outputs/metrics",
        metadata={"description": "Base directory for outputs (logs, etc.)"},
    )

    # Checkpointing
    checkpoint_path: str | None = field(
        default=None,
        metadata={"description": "Path to checkpoint to load from"},
    )

    # Distributed
    use_gpu: bool = True
    distributed: DistributedConfig = field(default_factory=DistributedConfig)

    # Evaluation limit
    max_iterations: int | None = field(
        default=None,
        metadata={"description": "Max number of batches to process per dataset"},
    )

    # Tokenizer
    tokenizer: Any = field(
        default=None, metadata={"description": "Tokenizer configuration"}
    )