May I ask the environment info?
#14
by
itaowe
- opened
When I want to reproduce this wonderful work, I met some errors which may comes from the wrong version of torch / transformers?
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
/home/itaowe/SFR-Embedding-Code-2B_R/reproduce.ipynb Cell 1 line 5
2 from transformers import AutoTokenizer, AutoModel
4 # load model with tokenizer
----> 5 model = AutoModel.from_pretrained('/home/itaowe/SFR-Embedding-Code-2B_R', trust_remote_code=True)
File ~/anaconda3/envs/sfr_embedding/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:564, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
562 cls.register(config.__class__, model_class, exist_ok=True)
563 model_class = add_generation_mixin_to_remote_model(model_class)
--> 564 return model_class.from_pretrained(
565 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
566 )
567 elif type(config) in cls._model_mapping.keys():
568 model_class = _get_model_class(config, cls._model_mapping)
File ~/anaconda3/envs/sfr_embedding/lib/python3.10/site-packages/transformers/modeling_utils.py:279, in restore_default_torch_dtype.<locals>._wrapper(*args, **kwargs)
277 old_dtype = torch.get_default_dtype()
278 try:
--> 279 return func(*args, **kwargs)
280 finally:
281 torch.set_default_dtype(old_dtype)
File ~/anaconda3/envs/sfr_embedding/lib/python3.10/site-packages/transformers/modeling_utils.py:4342, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)
4336 config = cls._autoset_attn_implementation(
4337 config, use_flash_attention_2=use_flash_attention_2, torch_dtype=torch_dtype, device_map=device_map
4338 )
4340 with ContextManagers(model_init_context):
4341 # Let's make sure we don't run the init function of buffer modules
-> 4342 model = cls(config, *model_args, **model_kwargs)
4344 # Make sure to tie the weights correctly
4345 model.tie_weights()
File ~/.cache/huggingface/modules/transformers_modules/SFR-Embedding-Code-2B_R/modeling_gemma2.py:1346, in CodeXEmbedModel2B.__init__(self, config, **kwargs)
1344 def __init__(self, config, **kwargs):
1345 super().__init__(config)
-> 1346 self.model = Gemma2Model.from_pretrained(config._name_or_path, trust_remote_code=True, is_causal=False, device_map="auto")
1347 self.tokenizer = AutoTokenizer.from_pretrained(config._name_or_path, trust_remote_code=True, device_map="auto")
1349 if not self.tokenizer.pad_token:
File ~/anaconda3/envs/sfr_embedding/lib/python3.10/site-packages/transformers/modeling_utils.py:279, in restore_default_torch_dtype.<locals>._wrapper(*args, **kwargs)
277 old_dtype = torch.get_default_dtype()
278 try:
--> 279 return func(*args, **kwargs)
280 finally:
281 torch.set_default_dtype(old_dtype)
File ~/anaconda3/envs/sfr_embedding/lib/python3.10/site-packages/transformers/modeling_utils.py:4475, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)
4472 device_map_kwargs["offload_buffers"] = True
4474 if not is_fsdp_enabled() and not is_deepspeed_zero3_enabled():
-> 4475 dispatch_model(model, **device_map_kwargs)
4477 if hf_quantizer is not None:
4478 hf_quantizer.postprocess_model(model, config=config)
File ~/anaconda3/envs/sfr_embedding/lib/python3.10/site-packages/accelerate/big_modeling.py:499, in dispatch_model(model, device_map, main_device, state_dict, offload_dir, offload_index, offload_buffers, skip_keys, preload_module_classes, force_hooks)
497 device = f"musa:{device}"
498 if device != "disk":
--> 499 model.to(device)
500 else:
501 raise ValueError(
502 "You are trying to offload the whole model to the disk. Please use the `disk_offload` function instead."
503 )
File ~/anaconda3/envs/sfr_embedding/lib/python3.10/site-packages/transformers/modeling_utils.py:3698, in PreTrainedModel.to(self, *args, **kwargs)
3693 if dtype_present_in_args:
3694 raise ValueError(
3695 "You cannot cast a GPTQ model in a new `dtype`. Make sure to load the model using `from_pretrained` using the desired"
3696 " `dtype` by passing the correct `torch_dtype` argument."
3697 )
-> 3698 return super().to(*args, **kwargs)
File ~/anaconda3/envs/sfr_embedding/lib/python3.10/site-packages/torch/nn/modules/module.py:1340, in Module.to(self, *args, **kwargs)
1337 else:
1338 raise
-> 1340 return self._apply(convert)
File ~/anaconda3/envs/sfr_embedding/lib/python3.10/site-packages/torch/nn/modules/module.py:900, in Module._apply(self, fn, recurse)
898 if recurse:
899 for module in self.children():
--> 900 module._apply(fn)
902 def compute_should_use_set_data(tensor, tensor_applied):
903 if torch._has_compatible_shallow_copy_type(tensor, tensor_applied):
904 # If the new tensor has compatible tensor type as the existing tensor,
905 # the current behavior is to change the tensor in-place using `.data =`,
(...)
910 # global flag to let the user control whether they want the future
911 # behavior of overwriting the existing tensor or not.
File ~/anaconda3/envs/sfr_embedding/lib/python3.10/site-packages/torch/nn/modules/module.py:927, in Module._apply(self, fn, recurse)
923 # Tensors stored in modules are graph leaves, and we don't want to
924 # track autograd history of `param_applied`, so we have to use
925 # `with torch.no_grad():`
926 with torch.no_grad():
--> 927 param_applied = fn(param)
928 p_should_use_set_data = compute_should_use_set_data(param, param_applied)
930 # subclasses may have multiple child tensors so we need to use swap_tensors
File ~/anaconda3/envs/sfr_embedding/lib/python3.10/site-packages/torch/nn/modules/module.py:1333, in Module.to.<locals>.convert(t)
1331 except NotImplementedError as e:
1332 if str(e) == "Cannot copy out of meta tensor; no data!":
-> 1333 raise NotImplementedError(
1334 f"{e} Please use torch.nn.Module.to_empty() instead of torch.nn.Module.to() "
1335 f"when moving module from meta to a different device."
1336 ) from None
1337 else:
1338 raise
NotImplementedError: Cannot copy out of meta tensor; no data! Please use torch.nn.Module.to_empty() instead of torch.nn.Module.to() when moving module from meta to a different device.
I have fixed this issue by pip install transformers==4.49.0
. Newer version of transformers
seems not ok.