File size: 2,392 Bytes
bbc7ddf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
from typing import List, Optional
from transformers import PretrainedConfig
class RZCompressionConfig(PretrainedConfig):
"""
Configuration for the roberta_zinc embedding-compression models.
Args:
input_size (int): Dimension of the input embedding.
compression_sizes (List[int]): One or more output dimensions.
encoder_layers (int): Number of FeedForwardLayers in the encoder path.
decoder_layers (int): Number of FeedForwardLayers in the optional decoder.
dropout (float): Drop-out prob in every layer except the final ones.
layer_norm_eps (float | None): Epsilon for LayerNorm.
mse_loss_weight (float): Weight for MSE loss on base-to-compressed similarity matrices
pearson_loss_weight (float): Weight for Pearson loss on base-to-compressed similarity matrices
topk_values (List[int]): Top-k values for weighting mse/pearson loss
decoder_cosine_weight (float): weight for decoder cosine similarity loss
"""
model_type = "roberta_zinc_compression_encoder"
def __init__(
self,
# ββ model params βββββββββββββββββββββββββββββββββββββββββββββ
input_size: int = 768,
compression_sizes: List[int] = (32, 64, 128, 256, 512),
encoder_layers: int = 2,
decoder_layers: int = 2,
dropout: float = 0.1,
layer_norm_eps: Optional[float] = 1e-12,
# ββ loss knobs βββββββββββββββββββββββββββββββββββββββββββββββ
mse_loss_weight: float = 0.0,
pearson_loss_weight: float = 0.0,
topk_values: list[int] = (10, 100),
decoder_cosine_weight: float = 0.0,
**kwargs,
):
self.input_size = input_size
self.compression_sizes = list(compression_sizes)
self.encoder_layers = encoder_layers
self.decoder_layers = decoder_layers
self.dropout = dropout
self.layer_norm_eps = layer_norm_eps
self.mse_loss_weight = mse_loss_weight
self.topk_values = topk_values
self.pearson_loss_weight = pearson_loss_weight
self.decoder_cosine_weight = decoder_cosine_weight
super().__init__(**kwargs) |