roberta_zinc_compression_encoder / configuration_roberta_zinc_compression_encoder.py
entropy's picture
Upload model
bbc7ddf verified
raw
history blame contribute delete
2.39 kB
from typing import List, Optional
from transformers import PretrainedConfig
class RZCompressionConfig(PretrainedConfig):
"""
Configuration for the roberta_zinc embedding-compression models.
Args:
input_size (int): Dimension of the input embedding.
compression_sizes (List[int]): One or more output dimensions.
encoder_layers (int): Number of FeedForwardLayers in the encoder path.
decoder_layers (int): Number of FeedForwardLayers in the optional decoder.
dropout (float): Drop-out prob in every layer except the final ones.
layer_norm_eps (float | None): Epsilon for LayerNorm.
mse_loss_weight (float): Weight for MSE loss on base-to-compressed similarity matrices
pearson_loss_weight (float): Weight for Pearson loss on base-to-compressed similarity matrices
topk_values (List[int]): Top-k values for weighting mse/pearson loss
decoder_cosine_weight (float): weight for decoder cosine similarity loss
"""
model_type = "roberta_zinc_compression_encoder"
def __init__(
self,
# ── model params ─────────────────────────────────────────────
input_size: int = 768,
compression_sizes: List[int] = (32, 64, 128, 256, 512),
encoder_layers: int = 2,
decoder_layers: int = 2,
dropout: float = 0.1,
layer_norm_eps: Optional[float] = 1e-12,
# ── loss knobs ───────────────────────────────────────────────
mse_loss_weight: float = 0.0,
pearson_loss_weight: float = 0.0,
topk_values: list[int] = (10, 100),
decoder_cosine_weight: float = 0.0,
**kwargs,
):
self.input_size = input_size
self.compression_sizes = list(compression_sizes)
self.encoder_layers = encoder_layers
self.decoder_layers = decoder_layers
self.dropout = dropout
self.layer_norm_eps = layer_norm_eps
self.mse_loss_weight = mse_loss_weight
self.topk_values = topk_values
self.pearson_loss_weight = pearson_loss_weight
self.decoder_cosine_weight = decoder_cosine_weight
super().__init__(**kwargs)