huggingface
diff --git a/‎docs/source/accelerate/deepspeed.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/source/accelerate/deepspeed.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/source/accelerate/fsdp.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/source/accelerate/fsdp.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/source/developer_guides/lora.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/source/developer_guides/lora.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/source/developer_guides/quantization.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/developer_guides/quantization.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/developer_guides/troubleshooting.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/source/developer_guides/troubleshooting.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/arrow_multitask/arrow_phi3_mini.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/arrow_multitask/arrow_phi3_mini.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/boft_controlnet/test_controlnet.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/boft_controlnet/test_controlnet.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/boft_dreambooth/train_dreambooth.py‎
Lines changed: 5 additions & 5 deletions b/‎examples/boft_dreambooth/train_dreambooth.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎examples/bone_finetuning/README.md‎
Lines changed: 2 additions & 2 deletions b/‎examples/bone_finetuning/README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/bone_finetuning/bone_finetuning.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/bone_finetuning/bone_finetuning.py‎
Lines changed: 1 addition & 1 deletion
@@ -263,11 +263,11 @@ model = AutoModelForCausalLM.from_pretrained(
     quantization_config=bnb_config,
     trust_remote_code=True,
     attn_implementation="flash_attention_2" if args.use_flash_attn else "eager",
-+   torch_dtype=quant_storage_dtype or torch.float32,
++   dtype=quant_storage_dtype or torch.float32,
 )
 ```
 
-Notice that `torch_dtype` for `AutoModelForCausalLM` is same as the `bnb_4bit_quant_storage` data type. That's it. Everything else is handled by Trainer and TRL.
+Notice that `dtype` for `AutoModelForCausalLM` is same as the `bnb_4bit_quant_storage` data type. That's it. Everything else is handled by Trainer and TRL.
 
 ## Memory usage
 
 
@@ -264,11 +264,11 @@ model = AutoModelForCausalLM.from_pretrained(
     quantization_config=bnb_config,
     trust_remote_code=True,
     attn_implementation="flash_attention_2" if args.use_flash_attn else "eager",
-+   torch_dtype=quant_storage_dtype or torch.float32,
++   dtype=quant_storage_dtype or torch.float32,
 )
 ```
 
-Notice that `torch_dtype` for `AutoModelForCausalLM` is same as the `bnb_4bit_quant_storage` data type. That's it. Everything else is handled by Trainer and TRL.
+Notice that `dtype` for `AutoModelForCausalLM` is same as the `bnb_4bit_quant_storage` data type. That's it. Everything else is handled by Trainer and TRL.
 
 ## Memory usage
 
 
@@ -539,7 +539,7 @@ from peft import PeftModel
 import torch
 
 base_model = AutoModelForCausalLM.from_pretrained(
-    "mistralai/Mistral-7B-v0.1", torch_dtype=torch.float16, device_map="auto"
+    "mistralai/Mistral-7B-v0.1", dtype=torch.float16, device_map="auto"
 )
 ```
 
@@ -813,7 +813,7 @@ To encode general knowledge, GenKnowSub subtracts the average of the provided ge
 >     # Loading the model
 >     base_model = AutoModelForCausalLM.from_pretrained(
 >         "microsoft/Phi-3-mini-4k-instruct",
->         torch_dtype=torch.bfloat16,
+>         dtype=torch.bfloat16,
 >         device_map="auto",
 >         quantization_config=bnb_config,
 >     )
 
@@ -144,7 +144,7 @@ The models support LoRA adapter tuning. To tune the quantized model you'll need
 ```py
 quantized_model = AutoModelForCausalLM.from_pretrained(
     "BlackSamorez/Mixtral-8x7b-AQLM-2Bit-1x16-hf-test-dispatch",
-    torch_dtype="auto", device_map="auto", low_cpu_mem_usage=True,
+    dtype="auto", device_map="auto", low_cpu_mem_usage=True,
 )
 
 peft_config = LoraConfig(...)
 
@@ -43,7 +43,7 @@ python -m pip install git+https://github.com/huggingface/peft
 
 ### ValueError: Attempting to unscale FP16 gradients
 
-This error probably occurred because the model was loaded with `torch_dtype=torch.float16` and then used in an automatic mixed precision (AMP) context, e.g. by setting `fp16=True` in the [`~transformers.Trainer`] class from 🤗 Transformers. The reason is that when using AMP, trainable weights should never use fp16. To make this work without loading the whole model in fp32, add the following to your code:
+This error probably occurred because the model was loaded with `dtype=torch.float16` and then used in an automatic mixed precision (AMP) context, e.g. by setting `fp16=True` in the [`~transformers.Trainer`] class from 🤗 Transformers. The reason is that when using AMP, trainable weights should never use fp16. To make this work without loading the whole model in fp32, add the following to your code:
 
 ```python
 peft_model = get_peft_model(...)
@@ -294,7 +294,7 @@ It is possible to get this information for non-PEFT models if they are using PEF
 
 >>> path = "runwayml/stable-diffusion-v1-5"
 >>> lora_id = "takuma104/lora-test-text-encoder-lora-target"
->>> pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
+>>> pipe = StableDiffusionPipeline.from_pretrained(path, dtype=torch.float16)
 >>> pipe.load_lora_weights(lora_id, adapter_name="adapter-1")
 >>> pipe.load_lora_weights(lora_id, adapter_name="adapter-2")
 >>> pipe.set_lora_device(["adapter-2"], "cuda")
 
@@ -303,7 +303,7 @@ def evaluate_on_multi_choice_batched(
     # Loading the model
     base_model = AutoModelForCausalLM.from_pretrained(
         MODEL_NAME,
-        torch_dtype=torch.bfloat16,
+        dtype=torch.bfloat16,
         device_map="auto",
         quantization_config=bnb_config,
     )
 
@@ -84,7 +84,7 @@ def main(args):
         args.pretrained_model_name_or_path,
         controlnet=controlnet,
         unet=unet.model,
-        torch_dtype=torch.float32,
+        dtype=torch.float32,
         requires_safety_checker=False,
     ).to(device)
 
 
@@ -139,16 +139,16 @@ def main(args):
         cur_class_images = len(list(class_images_dir.iterdir()))
 
         if cur_class_images < args.num_class_images:
-            torch_dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
+            dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
             if args.prior_generation_precision == "fp32":
-                torch_dtype = torch.float32
+                dtype = torch.float32
             elif args.prior_generation_precision == "fp16":
-                torch_dtype = torch.float16
+                dtype = torch.float16
             elif args.prior_generation_precision == "bf16":
-                torch_dtype = torch.bfloat16
+                dtype = torch.bfloat16
             pipeline = DiffusionPipeline.from_pretrained(
                 args.pretrained_model_name_or_path,
-                torch_dtype=torch_dtype,
+                dtype=dtype,
                 safety_checker=None,
                 revision=args.revision,
             )
 
@@ -11,7 +11,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 from trl import SFTConfig, SFTTrainer
 from datasets import load_dataset
 
-model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto")
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto")
 tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
 tokenizer.pad_token_id = tokenizer.eos_token_id
 bone_config = BoneConfig(
@@ -47,7 +47,7 @@ from peft import PeftModel
 from transformers import AutoModelForCausalLM
 
 model = AutoModelForCausalLM.from_pretrained(
-    "meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto"
+    "meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto"
 )
 peft_model = PeftModel.from_pretrained(model, "bone-llama-2-7b")
 ```
 
@@ -57,7 +57,7 @@ class ScriptArguments(SFTConfig):
     print(f"No available pre-processed model, manually initialize a Bone using {script_args.base_model_name_or_path}.")
     model = AutoModelForCausalLM.from_pretrained(
         script_args.base_model_name_or_path,
-        torch_dtype=(
+        dtype=(
             torch.float16
             if script_args.bits == "fp16"
             else (torch.bfloat16 if script_args.bits == "bf16" else torch.float32)
Original file line number	Diff line number	Diff line change
`@@ -144,7 +144,7 @@ The models support LoRA adapter tuning. To tune the quantized model you'll need`
`144`	`144`	```py
`145`	`145`	`quantized_model = AutoModelForCausalLM.from_pretrained(`
`146`	`146`	`"BlackSamorez/Mixtral-8x7b-AQLM-2Bit-1x16-hf-test-dispatch",`
`147`		`- torch_dtype="auto", device_map="auto", low_cpu_mem_usage=True,`
	`147`	`+ dtype="auto", device_map="auto", low_cpu_mem_usage=True,`
`148`	`148`	`)`
`149`	`149`
`150`	`150`	`peft_config = LoraConfig(...)`
Original file line number	Diff line number	Diff line change
`@@ -303,7 +303,7 @@ def evaluate_on_multi_choice_batched(`
`303`	`303`	`# Loading the model`
`304`	`304`	`base_model = AutoModelForCausalLM.from_pretrained(`
`305`	`305`	`MODEL_NAME,`
`306`		`- torch_dtype=torch.bfloat16,`
	`306`	`+ dtype=torch.bfloat16,`
`307`	`307`	`device_map="auto",`
`308`	`308`	`quantization_config=bnb_config,`
`309`	`309`	`)`