Salesforce/SFR-Embedding-Code-2B_R · May I ask the environment info?

When I want to reproduce this wonderful work, I met some errors which may comes from the wrong version of torch / transformers?
---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
/home/itaowe/SFR-Embedding-Code-2B_R/reproduce.ipynb Cell 1 line 5
      2 from transformers import AutoTokenizer, AutoModel
      4 # load model with tokenizer
----> 5 model = AutoModel.from_pretrained('/home/itaowe/SFR-Embedding-Code-2B_R', trust_remote_code=True)

File ~/anaconda3/envs/sfr_embedding/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:564, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
    562     cls.register(config.__class__, model_class, exist_ok=True)
    563     model_class = add_generation_mixin_to_remote_model(model_class)
--> 564     return model_class.from_pretrained(
    565         pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
    566     )
    567 elif type(config) in cls._model_mapping.keys():
    568     model_class = _get_model_class(config, cls._model_mapping)

File ~/anaconda3/envs/sfr_embedding/lib/python3.10/site-packages/transformers/modeling_utils.py:279, in restore_default_torch_dtype.<locals>._wrapper(*args, **kwargs)
    277 old_dtype = torch.get_default_dtype()
    278 try:
--> 279     return func(*args, **kwargs)
    280 finally:
    281     torch.set_default_dtype(old_dtype)

File ~/anaconda3/envs/sfr_embedding/lib/python3.10/site-packages/transformers/modeling_utils.py:4342, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)
   4336     config = cls._autoset_attn_implementation(
   4337         config, use_flash_attention_2=use_flash_attention_2, torch_dtype=torch_dtype, device_map=device_map
   4338     )
   4340 with ContextManagers(model_init_context):
   4341     # Let's make sure we don't run the init function of buffer modules
-> 4342     model = cls(config, *model_args, **model_kwargs)
   4344 # Make sure to tie the weights correctly
   4345 model.tie_weights()

File ~/.cache/huggingface/modules/transformers_modules/SFR-Embedding-Code-2B_R/modeling_gemma2.py:1346, in CodeXEmbedModel2B.__init__(self, config, **kwargs)
   1344 def __init__(self, config, **kwargs):
   1345     super().__init__(config)
-> 1346     self.model = Gemma2Model.from_pretrained(config._name_or_path, trust_remote_code=True, is_causal=False, device_map="auto")
   1347     self.tokenizer = AutoTokenizer.from_pretrained(config._name_or_path, trust_remote_code=True, device_map="auto")
   1349     if not self.tokenizer.pad_token:

File ~/anaconda3/envs/sfr_embedding/lib/python3.10/site-packages/transformers/modeling_utils.py:279, in restore_default_torch_dtype.<locals>._wrapper(*args, **kwargs)
    277 old_dtype = torch.get_default_dtype()
    278 try:
--> 279     return func(*args, **kwargs)
    280 finally:
    281     torch.set_default_dtype(old_dtype)

File ~/anaconda3/envs/sfr_embedding/lib/python3.10/site-packages/transformers/modeling_utils.py:4475, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)
   4472         device_map_kwargs["offload_buffers"] = True
   4474     if not is_fsdp_enabled() and not is_deepspeed_zero3_enabled():
-> 4475         dispatch_model(model, **device_map_kwargs)
   4477 if hf_quantizer is not None:
   4478     hf_quantizer.postprocess_model(model, config=config)

File ~/anaconda3/envs/sfr_embedding/lib/python3.10/site-packages/accelerate/big_modeling.py:499, in dispatch_model(model, device_map, main_device, state_dict, offload_dir, offload_index, offload_buffers, skip_keys, preload_module_classes, force_hooks)
    497     device = f"musa:{device}"
    498 if device != "disk":
--> 499     model.to(device)
    500 else:
    501     raise ValueError(
    502         "You are trying to offload the whole model to the disk. Please use the `disk_offload` function instead."
    503     )

File ~/anaconda3/envs/sfr_embedding/lib/python3.10/site-packages/transformers/modeling_utils.py:3698, in PreTrainedModel.to(self, *args, **kwargs)
   3693     if dtype_present_in_args:
   3694         raise ValueError(
   3695             "You cannot cast a GPTQ model in a new `dtype`. Make sure to load the model using `from_pretrained` using the desired"
   3696             " `dtype` by passing the correct `torch_dtype` argument."
   3697         )
-> 3698 return super().to(*args, **kwargs)

File ~/anaconda3/envs/sfr_embedding/lib/python3.10/site-packages/torch/nn/modules/module.py:1340, in Module.to(self, *args, **kwargs)
   1337         else:
   1338             raise
-> 1340 return self._apply(convert)

File ~/anaconda3/envs/sfr_embedding/lib/python3.10/site-packages/torch/nn/modules/module.py:900, in Module._apply(self, fn, recurse)
    898 if recurse:
    899     for module in self.children():
--> 900         module._apply(fn)
    902 def compute_should_use_set_data(tensor, tensor_applied):
    903     if torch._has_compatible_shallow_copy_type(tensor, tensor_applied):
    904         # If the new tensor has compatible tensor type as the existing tensor,
    905         # the current behavior is to change the tensor in-place using `.data =`,
   (...)
    910         # global flag to let the user control whether they want the future
    911         # behavior of overwriting the existing tensor or not.

File ~/anaconda3/envs/sfr_embedding/lib/python3.10/site-packages/torch/nn/modules/module.py:927, in Module._apply(self, fn, recurse)
    923 # Tensors stored in modules are graph leaves, and we don't want to
    924 # track autograd history of `param_applied`, so we have to use
    925 # `with torch.no_grad():`
    926 with torch.no_grad():
--> 927     param_applied = fn(param)
    928 p_should_use_set_data = compute_should_use_set_data(param, param_applied)
    930 # subclasses may have multiple child tensors so we need to use swap_tensors

File ~/anaconda3/envs/sfr_embedding/lib/python3.10/site-packages/torch/nn/modules/module.py:1333, in Module.to.<locals>.convert(t)
   1331 except NotImplementedError as e:
   1332     if str(e) == "Cannot copy out of meta tensor; no data!":
-> 1333         raise NotImplementedError(
   1334             f"{e} Please use torch.nn.Module.to_empty() instead of torch.nn.Module.to() "
   1335             f"when moving module from meta to a different device."
   1336         ) from None
   1337     else:
   1338         raise

NotImplementedError: Cannot copy out of meta tensor; no data! Please use torch.nn.Module.to_empty() instead of torch.nn.Module.to() when moving module from meta to a different device.