Update README.md
Browse files
README.md
CHANGED
@@ -412,6 +412,27 @@ we have no enough resource to evaluate the model
|
|
412 |
|
413 |
### Generate the model
|
414 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
415 |
~~~python
|
416 |
import torch
|
417 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
412 |
|
413 |
### Generate the model
|
414 |
|
415 |
+
**1 add meta data to bf16 model** https://huggingface.co/opensourcerelease/DeepSeek-R1-bf16
|
416 |
+
|
417 |
+
~~~python
|
418 |
+
import safetensors
|
419 |
+
from safetensors.torch import save_file
|
420 |
+
|
421 |
+
for i in range(1, 164):
|
422 |
+
idx_str = "0" * (5-len(str(i))) + str(i)
|
423 |
+
safetensors_path = f"model-{idx_str}-of-000163.safetensors"
|
424 |
+
print(safetensors_path)
|
425 |
+
tensors = dict()
|
426 |
+
with safetensors.safe_open(safetensors_path, framework="pt") as f:
|
427 |
+
for key in f.keys():
|
428 |
+
tensors[key] = f.get_tensor(key)
|
429 |
+
save_file(tensors, safetensors_path, metadata={'format': 'pt'})
|
430 |
+
~~~
|
431 |
+
|
432 |
+
|
433 |
+
|
434 |
+
**2 remove torch.no_grad** in modeling_deepseek.py as we need some tuning in AutoRound.
|
435 |
+
|
436 |
~~~python
|
437 |
import torch
|
438 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|