|
from typing import List, Optional |
|
from transformers import PretrainedConfig |
|
|
|
|
|
class RZCompressionConfig(PretrainedConfig): |
|
""" |
|
Configuration for the roberta_zinc embedding-compression models. |
|
|
|
Args: |
|
input_size (int): Dimension of the input embedding. |
|
compression_sizes (List[int]): One or more output dimensions. |
|
encoder_layers (int): Number of FeedForwardLayers in the encoder path. |
|
decoder_layers (int): Number of FeedForwardLayers in the optional decoder. |
|
dropout (float): Drop-out prob in every layer except the final ones. |
|
layer_norm_eps (float | None): Epsilon for LayerNorm. |
|
mse_loss_weight (float): Weight for MSE loss on base-to-compressed similarity matrices |
|
pearson_loss_weight (float): Weight for Pearson loss on base-to-compressed similarity matrices |
|
topk_values (List[int]): Top-k values for weighting mse/pearson loss |
|
decoder_cosine_weight (float): weight for decoder cosine similarity loss |
|
""" |
|
model_type = "roberta_zinc_compression_encoder" |
|
|
|
def __init__( |
|
self, |
|
|
|
input_size: int = 768, |
|
compression_sizes: List[int] = (32, 64, 128, 256, 512), |
|
encoder_layers: int = 2, |
|
decoder_layers: int = 2, |
|
dropout: float = 0.1, |
|
layer_norm_eps: Optional[float] = 1e-12, |
|
|
|
mse_loss_weight: float = 0.0, |
|
pearson_loss_weight: float = 0.0, |
|
topk_values: list[int] = (10, 100), |
|
decoder_cosine_weight: float = 0.0, |
|
**kwargs, |
|
): |
|
self.input_size = input_size |
|
self.compression_sizes = list(compression_sizes) |
|
self.encoder_layers = encoder_layers |
|
self.decoder_layers = decoder_layers |
|
self.dropout = dropout |
|
self.layer_norm_eps = layer_norm_eps |
|
self.mse_loss_weight = mse_loss_weight |
|
self.topk_values = topk_values |
|
self.pearson_loss_weight = pearson_loss_weight |
|
self.decoder_cosine_weight = decoder_cosine_weight |
|
super().__init__(**kwargs) |