@@ -291,6 +291,16 @@ jobs:
291291 bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-bfloat16"
292292 echo "::endgroup::"
293293
294+ echo "::group::Run inference with quantize file"
295+ for DEVICE in cpu; do # cuda
296+ # cuda - fails because `AttributeError: 'Linear' object has no attribute '_linear_extra_repr'`
297+ # follow up with torchao as a separate PR
298+ echo "saving snapshot for device ${DEVICE} and dtype bfloat16, and reloading as snapshot"
299+ python3 torchchat.py export --device ${DEVICE} --output-snap model.tc --dtype bfloat16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
300+ python3 torchchat.py generate --device ${DEVICE} --snap model.tc --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
301+ done
302+ echo "::endgroup::"
303+
294304 test-gpu-aoti-float32 :
295305 permissions :
296306 id-token : write
@@ -335,6 +345,11 @@ jobs:
335345 fi
336346 echo "::endgroup::"
337347
348+ # echo "::group::Run inference with quantize file"
349+ # python3 torchchat.py export --output-snap model.tc --dtype float32 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
350+ # python3 torchchat.py generate --snap model.tc --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
351+ # echo "::endgroup::"
352+
338353 test-gpu-aoti-float16 :
339354 permissions :
340355 id-token : write
@@ -376,10 +391,15 @@ jobs:
376391 echo "::group::Run inference with quantize file"
377392 if [ $(uname -s) == Darwin ]; then
378393 python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
379- python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
394+ python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
380395 fi
381396 echo "::endgroup::"
382397
398+ # echo "::group::Run inference with quantize file"
399+ # python3 torchchat.py export --output-snap model.tc --dtype float16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
400+ # python3 torchchat.py generate --snap model.tc --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
401+ # echo "::endgroup::"
402+
383403 test-gpu-eval-sanity-check :
384404 permissions :
385405 id-token : write
@@ -495,10 +515,11 @@ jobs:
495515 python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
496516
497517 echo "******************************************"
498- echo "*** --quantize torchchat/quant_config/mobile.json ***"
518+ echo "*** can't test --quantize torchchat/quant_config/mobile.json ***"
519+ echo "*** testing --quantize torchchat/quant_config/mobile-32.json ***"
499520 echo "******************************************"
500- # python torchchat.py export --quantize torchchat/quant_config/mobile.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
501- # python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
521+ python torchchat.py export --quantize torchchat/quant_config/mobile-32 .json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
522+ python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
502523
503524
504525 echo "******************************************"
@@ -1055,7 +1076,59 @@ jobs:
10551076 ./runner/build_android.sh
10561077 echo "Tests complete."
10571078
1058- test-torchao-experimental :
1079+ test-torchao-aoti-experimental :
1080+ strategy :
1081+ matrix :
1082+ runner : [macos-14-xlarge]
1083+ runs-on : ${{matrix.runner}}
1084+ steps :
1085+ - name : Checkout repo
1086+ uses : actions/checkout@v3
1087+ with :
1088+ submodules : true
1089+ - name : Setup Python
1090+ uses : actions/setup-python@v2
1091+ with :
1092+ python-version : 3.10.11
1093+ - name : Setup Xcode
1094+ if : runner.os == 'macOS'
1095+ uses : maxim-lobanov/setup-xcode@v1
1096+ with :
1097+ xcode-version : ' 15.3'
1098+ - name : Print machine info
1099+ run : |
1100+ uname -a
1101+ if [ $(uname -s) == Darwin ]; then
1102+ sysctl machdep.cpu.brand_string
1103+ sysctl machdep.cpu.core_count
1104+ fi
1105+ - name : Install torchchat
1106+ run : |
1107+ echo "Intalling pip3 packages"
1108+ ./install/install_requirements.sh
1109+ pip3 list
1110+ python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
1111+ - name : Install torchao-ops
1112+ id : install-torchao-ops
1113+ run : |
1114+ bash torchchat/utils/scripts/build_torchao_ops.sh
1115+ - name : Install runner AOTI
1116+ id : install-runner-aoti
1117+ run : |
1118+ bash torchchat/utils/scripts/build_native.sh aoti link_torchao_ops
1119+ - name : Run inference
1120+ run : |
1121+ python torchchat.py download stories110M
1122+ wget -O ./tokenizer.model https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
1123+ export PRMT="Once upon a time in a land far away"
1124+ echo "Export and run AOTI (C++ runner)"
1125+ python torchchat.py export stories110M --output-aoti-package-path ./model.pt2 --dtype float32 --quantize '{"embedding:wx": {"bitwidth": 2, "groupsize": 32}, "linear:a8wxdq": {"bitwidth": 3, "groupsize": 128, "has_weight_zeros": false}}'
1126+ ./cmake-out/aoti_run ./model.pt2 -z ./tokenizer.model -t 0 -i "${PRMT}"
1127+ echo "Generate AOTI"
1128+ python torchchat.py generate stories110M --aoti-package-path ./model.pt2 --prompt "${PRMT}"
1129+ echo "Tests complete."
1130+
1131+ test-torchao-et-experimental :
10591132 strategy :
10601133 matrix :
10611134 runner : [macos-14-xlarge]
@@ -1100,10 +1173,6 @@ jobs:
11001173 run : |
11011174 echo "Installing runner"
11021175 bash torchchat/utils/scripts/build_native.sh et link_torchao_ops
1103- - name : Install runner AOTI
1104- id : install-runner-aoti
1105- run : |
1106- bash torchchat/utils/scripts/build_native.sh aoti link_torchao_ops
11071176 - name : Run inference
11081177 run : |
11091178 python torchchat.py download stories110M
@@ -1116,11 +1185,6 @@ jobs:
11161185 echo "Export and run ET (C++ runner)"
11171186 python torchchat.py export stories110M --output-pte-path ./model.pte --dtype float32 --quantize '{"embedding:wx": {"bitwidth": 2, "groupsize": 32}, "linear:a8wxdq": {"bitwidth": 3, "groupsize": 128, "has_weight_zeros": false}}'
11181187 ./cmake-out/et_run ./model.pte -z ./tokenizer.model -t 0 -i "${PRMT}"
1119- echo "Export and run AOTI (C++ runner)"
1120- python torchchat.py export stories110M --output-aoti-package-path ./model.pt2 --dtype float32 --quantize '{"embedding:wx": {"bitwidth": 2, "groupsize": 32}, "linear:a8wxdq": {"bitwidth": 3, "groupsize": 128, "has_weight_zeros": false}}'
1121- ./cmake-out/aoti_run ./model.pt2 -z ./tokenizer.model -t 0 -i "${PRMT}"
1122- echo "Generate AOTI"
1123- python torchchat.py generate stories110M --aoti-package-path ./model.pt2 --prompt "${PRMT}"
11241188 echo "Tests complete."
11251189
11261190 test-torchao-experimental-mps :
0 commit comments