Qiita 自然言語システムAdvent Calendar 2024

huggingface error2

Posted at 2024-12-17

ValueError Traceback (most recent call last)
Cell In[32], line 21
1 """
2 model: モデル
3
(...)
19 - リモートコードの実行を許可 (カスタムモデルなど)
20 """
---> 21 model = AutoModelForCausalLM.from_pretrained(
22 base_model_id,
23 quantization_config=bnb_config,
24 device_map="auto"
25 )
27 tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)

File /usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py:564, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
562 elif type(config) in cls._model_mapping.keys():
563 model_class = _get_model_class(config, cls._model_mapping)
--> 564 return model_class.from_pretrained(
565 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
566 )
567 raise ValueError(
568 f"Unrecognized configuration class {config.class} for this kind of AutoModel: {cls.name}.\n"
569 f"Model type should be one of {', '.join(c.name for c in cls._model_mapping.keys())}."
570 )

File /usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py:4207, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)
4204 device_map = infer_auto_device_map(model, dtype=target_dtype, **device_map_kwargs)
4206 if hf_quantizer is not None:
-> 4207 hf_quantizer.validate_environment(device_map=device_map)
4209 elif device_map is not None:
4210 model.tie_weights()

File /usr/local/lib/python3.10/dist-packages/transformers/quantizers/quantizer_bnb_4bit.py:102, in Bnb4BitHfQuantizer.validate_environment(self, *args, **kwargs)
100 pass
101 elif "cpu" in device_map_without_lm_head.values() or "disk" in device_map_without_lm_head.values():
--> 102 raise ValueError(
103 "Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the "
104 "quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules "
105 "in 32-bit, you need to set llm_int8_enable_fp32_cpu_offload=True and pass a custom device_map to "
106 "from_pretrained. Check "
107 "https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu "
108 "for more details. "
109 )
111 if version.parse(importlib.metadata.version("bitsandbytes")) < version.parse("0.39.0"):
112 raise ValueError(
113 "You have a version of bitsandbytes that is not compatible with 4bit inference and training"
114 " make sure you have the latest version of bitsandbytes installed"
115 )

ValueError: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set llm_int8_enable_fp32_cpu_offload=True and pass a custom device_map to from_pretrained. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details.

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up