gguf not llama.cpp compatible yet
llama-cli -hf microsoft/bitnet-b1.58-2B-4T-gguf
common_download_file_single: previous metadata file found /Volumes/M2_4TB/llama_cache/microsoft_bitnet-b1.58-2B-4T-gguf_ggml-model-i2_s.gguf.json: {"etag":""25796939808aceffb931ae1fe1e84104-116"","lastModified":"Tue, 15 Apr 2025 04:54:28 GMT","url":"https://huggingface.co/microsoft/bitnet-b1.58-2B-4T-gguf/resolve/main/ggml-model-i2_s.gguf"}
curl_perform_with_retry: Trying to download from https://huggingface.co/microsoft/bitnet-b1.58-2B-4T-gguf/resolve/main/ggml-model-i2_s.gguf (attempt 1 of 3)...
gguf_init_from_file_impl: tensor 'blk.0.ffn_down.weight' of type 36 (TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking) has 6912 elements per row, not a multiple of block size (0)
gguf_init_from_file_impl: failed to read tensor info
common_download_model: failed to load input GGUF from /Volumes/M2_4TB/llama_cache/microsoft_bitnet-b1.58-2B-4T-gguf_ggml-model-i2_s.gguf
error: failed to download model from https://huggingface.co/microsoft/bitnet-b1.58-2B-4T-gguf/resolve/main/ggml-model-i2_s.gguf
It requires the latest version of BitNet to run the gguf. Was added last month. add support for bitnet2b_2501
You will not receive the performance enhancements unless you optimize the inference for your device type and use the latest version. Native support for LLAMA will probably come soon its just not intended to run like that out of the box.
Super fast response running locally on intel I9 cpu using BitNet
I'm getting this error, when running your code:
The repository for microsoft/bitnet-b1.58-2B-4T contains custom code which must be executed to correctly load the model. You can inspect the repository content at https://hf.co/microsoft/bitnet-b1.58-2B-4T.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.
Do you wish to run the custom code? [y/N] y
Could not locate the configuration_bitnet.py inside microsoft/bitnet-b1.58-2B-4T.
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
File ~/.local/lib/python3.10/site-packages/huggingface_hub/utils/_http.py:406, in hf_raise_for_status(response, endpoint_name)
405 try:
--> 406 response.raise_for_status()
407 except HTTPError as e:
File ~/.local/lib/python3.10/site-packages/requests/models.py:1024, in Response.raise_for_status(self)
1023 if http_error_msg:
-> 1024 raise HTTPError(http_error_msg, response=self)
HTTPError: 404 Client Error: Not Found for url: https://huggingface.co/microsoft/bitnet-b1.58-2B-4T/resolve/main/configuration_bitnet.py
The above exception was the direct cause of the following exception:
EntryNotFoundError Traceback (most recent call last)
File ~/.local/lib/python3.10/site-packages/transformers/utils/hub.py:403, in cached_file(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)
401 try:
402 # Load from URL or cache if already cached
--> 403 resolved_file = hf_hub_download(
404 path_or_repo_id,
405 filename,
406 subfolder=None if len(subfolder) == 0 else subfolder,
407 repo_type=repo_type,
408 revision=revision,
409 cache_dir=cache_dir,
410 user_agent=user_agent,
411 force_download=force_download,
412 proxies=proxies,
413 resume_download=resume_download,
414 token=token,
415 local_files_only=local_files_only,
416 )
417 except GatedRepoError as e:
File ~/.local/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:114, in validate_hf_hub_args.<locals>._inner_fn(*args, **kwargs)
112 kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=has_token, kwargs=kwargs)
--> 114 return fn(*args, **kwargs)
File ~/.local/lib/python3.10/site-packages/huggingface_hub/file_download.py:860, in hf_hub_download(repo_id, filename, subfolder, repo_type, revision, library_name, library_version, cache_dir, local_dir, user_agent, force_download, proxies, etag_timeout, token, local_files_only, headers, endpoint, resume_download, force_filename, local_dir_use_symlinks)
859 else:
--> 860 return _hf_hub_download_to_cache_dir(
861 # Destination
862 cache_dir=cache_dir,
863 # File info
864 repo_id=repo_id,
865 filename=filename,
866 repo_type=repo_type,
867 revision=revision,
868 # HTTP info
869 endpoint=endpoint,
870 etag_timeout=etag_timeout,
871 headers=hf_headers,
872 proxies=proxies,
873 token=token,
874 # Additional options
875 local_files_only=local_files_only,
876 force_download=force_download,
877 )
File ~/.local/lib/python3.10/site-packages/huggingface_hub/file_download.py:923, in _hf_hub_download_to_cache_dir(cache_dir, repo_id, filename, repo_type, revision, endpoint, etag_timeout, headers, proxies, token, local_files_only, force_download)
921 # Try to get metadata (etag, commit_hash, url, size) from the server.
922 # If we can't, a HEAD request error is returned.
--> 923 (url_to_download, etag, commit_hash, expected_size, head_call_error) = _get_metadata_or_catch_error(
924 repo_id=repo_id,
925 filename=filename,
926 repo_type=repo_type,
927 revision=revision,
928 endpoint=endpoint,
929 proxies=proxies,
930 etag_timeout=etag_timeout,
931 headers=headers,
932 token=token,
933 local_files_only=local_files_only,
934 storage_folder=storage_folder,
935 relative_filename=relative_filename,
936 )
938 # etag can be None for several reasons:
939 # 1. we passed local_files_only.
940 # 2. we don't have a connection
(...)
946 # If the specified revision is a commit hash, look inside "snapshots".
947 # If the specified revision is a branch or tag, look inside "refs".
File ~/.local/lib/python3.10/site-packages/huggingface_hub/file_download.py:1374, in _get_metadata_or_catch_error(repo_id, filename, repo_type, revision, endpoint, proxies, etag_timeout, headers, token, local_files_only, relative_filename, storage_folder)
1373 try:
-> 1374 metadata = get_hf_file_metadata(
1375 url=url, proxies=proxies, timeout=etag_timeout, headers=headers, token=token
1376 )
1377 except EntryNotFoundError as http_error:
File ~/.local/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:114, in validate_hf_hub_args.<locals>._inner_fn(*args, **kwargs)
112 kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=has_token, kwargs=kwargs)
--> 114 return fn(*args, **kwargs)
File ~/.local/lib/python3.10/site-packages/huggingface_hub/file_download.py:1294, in get_hf_file_metadata(url, token, proxies, timeout, library_name, library_version, user_agent, headers)
1293 # Retrieve metadata
-> 1294 r = _request_wrapper(
1295 method="HEAD",
1296 url=url,
1297 headers=hf_headers,
1298 allow_redirects=False,
1299 follow_relative_redirects=True,
1300 proxies=proxies,
1301 timeout=timeout,
1302 )
1303 hf_raise_for_status(r)
File ~/.local/lib/python3.10/site-packages/huggingface_hub/file_download.py:278, in _request_wrapper(method, url, follow_relative_redirects, **params)
277 if follow_relative_redirects:
--> 278 response = _request_wrapper(
279 method=method,
280 url=url,
281 follow_relative_redirects=False,
282 **params,
283 )
285 # If redirection, we redirect only relative paths.
286 # This is useful in case of a renamed repository.
File ~/.local/lib/python3.10/site-packages/huggingface_hub/file_download.py:302, in _request_wrapper(method, url, follow_relative_redirects, **params)
301 response = get_session().request(method=method, url=url, **params)
--> 302 hf_raise_for_status(response)
303 return response
File ~/.local/lib/python3.10/site-packages/huggingface_hub/utils/_http.py:417, in hf_raise_for_status(response, endpoint_name)
416 message = f"{response.status_code} Client Error." + "\n\n" + f"Entry Not Found for url: {response.url}."
--> 417 raise _format(EntryNotFoundError, message, response) from e
419 elif error_code == "GatedRepo":
EntryNotFoundError: 404 Client Error. (Request ID: Root=1-68002e3d-74d54501795be5050e6b1d2b;0cd075ce-27da-4af9-aca9-59176cd70c28)
Entry Not Found for url: https://huggingface.co/microsoft/bitnet-b1.58-2B-4T/resolve/main/configuration_bitnet.py.
The above exception was the direct cause of the following exception:
OSError Traceback (most recent call last)
Cell In[8], line 8
6 # Load tokenizer and model
7 tokenizer = AutoTokenizer.from_pretrained(model_id)
----> 8 model = AutoModelForCausalLM.from_pretrained(
9 model_id,
10 torch_dtype=torch.bfloat16,
11 # trust_remote_code=True,
12 )
14 # Apply the chat template
15 messages = [
16 {"role": "system", "content": "You are a helpful AI assistant."},
17 {"role": "user", "content": "How are you?"},
18 ]
File ~/.local/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:526, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
523 if kwargs.get("quantization_config", None) is not None:
524 _ = kwargs.pop("quantization_config")
--> 526 config, kwargs = AutoConfig.from_pretrained(
527 pretrained_model_name_or_path,
528 return_unused_kwargs=True,
529 trust_remote_code=trust_remote_code,
530 code_revision=code_revision,
531 _commit_hash=commit_hash,
532 **hub_kwargs,
533 **kwargs,
534 )
536 # if torch_dtype=auto was passed here, ensure to pass it on
537 if kwargs_orig.get("torch_dtype", None) == "auto":
File ~/.local/lib/python3.10/site-packages/transformers/models/auto/configuration_auto.py:1063, in AutoConfig.from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
1061 if has_remote_code and trust_remote_code:
1062 class_ref = config_dict["auto_map"]["AutoConfig"]
-> 1063 config_class = get_class_from_dynamic_module(
1064 class_ref, pretrained_model_name_or_path, code_revision=code_revision, **kwargs
1065 )
1066 if os.path.isdir(pretrained_model_name_or_path):
1067 config_class.register_for_auto_class()
File ~/.local/lib/python3.10/site-packages/transformers/dynamic_module_utils.py:541, in get_class_from_dynamic_module(class_reference, pretrained_model_name_or_path, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, repo_type, code_revision, **kwargs)
539 code_revision = revision
540 # And lastly we get the class inside our newly created module
--> 541 final_module = get_cached_module_file(
542 repo_id,
543 module_file + ".py",
544 cache_dir=cache_dir,
545 force_download=force_download,
546 resume_download=resume_download,
547 proxies=proxies,
548 token=token,
549 revision=code_revision,
550 local_files_only=local_files_only,
551 repo_type=repo_type,
552 )
553 return get_class_in_module(class_name, final_module, force_reload=force_download)
File ~/.local/lib/python3.10/site-packages/transformers/dynamic_module_utils.py:345, in get_cached_module_file(pretrained_model_name_or_path, module_file, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, repo_type, _commit_hash, **deprecated_kwargs)
342 new_files = []
343 try:
344 # Load from URL or cache if already cached
--> 345 resolved_module_file = cached_file(
346 pretrained_model_name_or_path,
347 module_file,
348 cache_dir=cache_dir,
349 force_download=force_download,
350 proxies=proxies,
351 resume_download=resume_download,
352 local_files_only=local_files_only,
353 token=token,
354 revision=revision,
355 repo_type=repo_type,
356 _commit_hash=_commit_hash,
357 )
358 if not is_local and cached_module != resolved_module_file:
359 new_files.append(module_file)
File ~/.local/lib/python3.10/site-packages/transformers/utils/hub.py:459, in cached_file(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)
457 if filename in ["config.json", f"{subfolder}/config.json"]:
458 return None
--> 459 raise EnvironmentError(
460 f"{path_or_repo_id} does not appear to have a file named {full_filename}. Checkout "
461 f"'https://huggingface.co/{path_or_repo_id}/tree/{revision}' for available files."
462 ) from e
463 except HTTPError as err:
464 resolved_file = _get_cache_file_to_return(path_or_repo_id, full_filename, cache_dir, revision)
OSError: microsoft/bitnet-b1.58-2B-4T does not appear to have a file named configuration_bitnet.py. Checkout 'https://huggingface.co/microsoft/bitnet-b1.58-2B-4T/tree/main' for available files.