File size: 794 Bytes
752f635 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
import torch
from transformers import AutoModel, AutoTokenizer, FlaxAutoModel
from datasets import load_dataset
from wechsel import WECHSEL, load_embeddings
source_tokenizer = AutoTokenizer.from_pretrained("roberta-large")
model = AutoModel.from_pretrained("roberta-large")
target_tokenizer = AutoTokenizer.from_pretrained("./")
wechsel = WECHSEL(
load_embeddings("en"),
load_embeddings("fi"),
bilingual_dictionary="finnish"
)
target_embeddings, info = wechsel.apply(
source_tokenizer,
target_tokenizer,
model.get_input_embeddings().weight.detach().numpy(),
)
model.get_input_embeddings().weight.data = torch.from_numpy(target_embeddings)
model.save_pretrained("./")
flax_model = FlaxAutoModel.from_pretrained("./", from_pt=True)
flax_model.save_pretrained("./")
|