Merge branch 'main' into patch-43

mikekgfb · web-flow · commit 6e51132f529f · 2025-02-18T10:06:49.000-08:00
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -291,6 +291,16 @@ jobs:
         bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-bfloat16"
         echo "::endgroup::"
 
+        echo "::group::Run inference with quantize file"
+        for DEVICE in cpu; do # cuda 
+          # cuda - fails because `AttributeError: 'Linear' object has no attribute '_linear_extra_repr'`
+          # follow up with torchao as a separate PR
+          echo "saving snapshot for device ${DEVICE} and dtype bfloat16, and reloading as snapshot"
+          python3 torchchat.py export --device ${DEVICE} --output-snap model.tc --dtype bfloat16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+          python3 torchchat.py generate --device ${DEVICE} --snap model.tc --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+        done
+        echo "::endgroup::"
+
   test-gpu-aoti-float32:
     permissions:
       id-token: write
@@ -335,6 +345,11 @@ jobs:
         fi
         echo "::endgroup::"
 
+        # echo "::group::Run inference with quantize file"
+        # python3 torchchat.py export --output-snap model.tc --dtype float32 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+        # python3 torchchat.py generate --snap model.tc --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+        # echo "::endgroup::"
+        
   test-gpu-aoti-float16:
     permissions:
       id-token: write
@@ -376,10 +391,15 @@ jobs:
         echo "::group::Run inference with quantize file"
         if [ $(uname -s) == Darwin ]; then
           python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
-             python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
+          python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
         fi
         echo "::endgroup::"
 
+        # echo "::group::Run inference with quantize file"
+        # python3 torchchat.py export --output-snap model.tc --dtype float16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+        # python3 torchchat.py generate --snap model.tc --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+        # echo "::endgroup::"
+
   test-gpu-eval-sanity-check:
     permissions:
       id-token: write
@@ -495,10 +515,11 @@ jobs:
           python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
 
           echo "******************************************"
-          echo "*** --quantize torchchat/quant_config/mobile.json ***"
+          echo "*** can't test --quantize torchchat/quant_config/mobile.json ***"
+          echo "*** testing --quantize torchchat/quant_config/mobile-32.json ***"
           echo "******************************************"
-          # python torchchat.py export --quantize torchchat/quant_config/mobile.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
-          # python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
+          python torchchat.py export --quantize torchchat/quant_config/mobile-32.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
+          python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
 
 
           echo "******************************************"
@@ -1055,7 +1076,59 @@ jobs:
           ./runner/build_android.sh
           echo "Tests complete."
 
-  test-torchao-experimental:
+  test-torchao-aoti-experimental:
+    strategy:
+      matrix:
+        runner: [macos-14-xlarge]
+    runs-on: ${{matrix.runner}}
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+      - name: Setup Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.10.11
+      - name: Setup Xcode
+        if: runner.os == 'macOS'
+        uses: maxim-lobanov/setup-xcode@v1
+        with:
+          xcode-version: '15.3'
+      - name: Print machine info
+        run: |
+          uname -a
+          if [ $(uname -s) == Darwin ]; then
+            sysctl machdep.cpu.brand_string
+            sysctl machdep.cpu.core_count
+          fi
+      - name: Install torchchat
+        run: |
+          echo "Intalling pip3 packages"
+          ./install/install_requirements.sh
+          pip3 list
+          python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
+      - name: Install torchao-ops
+        id: install-torchao-ops
+        run: |
+          bash torchchat/utils/scripts/build_torchao_ops.sh
+      - name: Install runner AOTI
+        id: install-runner-aoti
+        run: |
+          bash torchchat/utils/scripts/build_native.sh aoti link_torchao_ops
+      - name: Run inference
+        run: |
+          python torchchat.py download stories110M
+          wget -O ./tokenizer.model https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
+          export PRMT="Once upon a time in a land far away"
+          echo "Export and run AOTI (C++ runner)"
+          python torchchat.py export stories110M --output-aoti-package-path ./model.pt2 --dtype float32 --quantize '{"embedding:wx": {"bitwidth": 2, "groupsize": 32}, "linear:a8wxdq": {"bitwidth": 3, "groupsize": 128, "has_weight_zeros": false}}'
+          ./cmake-out/aoti_run ./model.pt2 -z ./tokenizer.model -t 0 -i "${PRMT}"
+          echo "Generate AOTI"
+          python torchchat.py generate stories110M --aoti-package-path ./model.pt2 --prompt "${PRMT}"
+          echo "Tests complete."
+
+  test-torchao-et-experimental:
     strategy:
       matrix:
         runner: [macos-14-xlarge]
@@ -1100,10 +1173,6 @@ jobs:
         run: |
           echo "Installing runner"
           bash torchchat/utils/scripts/build_native.sh et link_torchao_ops
-      - name: Install runner AOTI
-        id: install-runner-aoti
-        run: |
-          bash torchchat/utils/scripts/build_native.sh aoti link_torchao_ops
       - name: Run inference
         run: |
           python torchchat.py download stories110M
@@ -1116,11 +1185,6 @@ jobs:
           echo "Export and run ET (C++ runner)"
           python torchchat.py export stories110M --output-pte-path ./model.pte --dtype float32 --quantize '{"embedding:wx": {"bitwidth": 2, "groupsize": 32}, "linear:a8wxdq": {"bitwidth": 3, "groupsize": 128, "has_weight_zeros": false}}'
           ./cmake-out/et_run ./model.pte -z ./tokenizer.model -t 0 -i "${PRMT}"
-          echo "Export and run AOTI (C++ runner)"
-          python torchchat.py export stories110M --output-aoti-package-path ./model.pt2 --dtype float32 --quantize '{"embedding:wx": {"bitwidth": 2, "groupsize": 32}, "linear:a8wxdq": {"bitwidth": 3, "groupsize": 128, "has_weight_zeros": false}}'
-          ./cmake-out/aoti_run ./model.pt2 -z ./tokenizer.model -t 0 -i "${PRMT}"
-          echo "Generate AOTI"
-          python torchchat.py generate stories110M --aoti-package-path ./model.pt2 --prompt "${PRMT}"
           echo "Tests complete."
 
   test-torchao-experimental-mps:
diff --git a/install/.pins/et-pin.txt b/install/.pins/et-pin.txt
@@ -1 +1 @@
-9c043290ad3944268290e015c3063bc411e6ef6b
+791472d6706b027552f39f11b28d034e4839c9af
diff --git a/install/install_requirements.sh b/install/install_requirements.sh
@@ -51,13 +51,13 @@ echo "Using pip executable: $PIP_EXECUTABLE"
 # NOTE: If a newly-fetched version of the executorch repo changes the value of
 # PYTORCH_NIGHTLY_VERSION, you should re-run this script to install the necessary
 # package versions.
-PYTORCH_NIGHTLY_VERSION=dev20250124
+PYTORCH_NIGHTLY_VERSION=dev20250131
 
 # Nightly version for torchvision
-VISION_NIGHTLY_VERSION=dev20250124
+VISION_NIGHTLY_VERSION=dev20250131
 
 # Nightly version for torchtune
-TUNE_NIGHTLY_VERSION=dev20250124
+TUNE_NIGHTLY_VERSION=dev20250131
 
 # The pip repository that hosts nightly torch packages. cpu by default.
 # If cuda is available, based on presence of nvidia-smi, install the pytorch nightly
diff --git a/torchchat/utils/scripts/build_native.sh b/torchchat/utils/scripts/build_native.sh
@@ -86,6 +86,9 @@ if [[ "$TARGET" == "et" ]]; then
     EXECUTORCH_LIBRARIES="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libexecutorch_no_prim_ops.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libextension_threadpool.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcpuinfo.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libpthreadpool.a"
     install_torchao_executorch_ops
   fi
+elif [[ "$LINK_TORCHAO_OPS" == "ON" ]]; then
+  # Install OMP when using AOTI with linked torchao ops
+  brew install libomp
 fi
 popd
 
diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh
@@ -88,10 +88,10 @@ install_executorch_python_libs() {
   echo "Building and installing python libraries"
   if [ "${ENABLE_ET_PYBIND}" = false ]; then
       echo "Not installing pybind"
-      bash ./install_requirements.sh --pybind off
+      bash ./install_executorch.sh --pybind off
   else
       echo "Installing pybind"
-      bash ./install_requirements.sh --pybind xnnpack
+      bash ./install_executorch.sh --pybind xnnpack
   fi
 
   # TODO: figure out the root cause of 'AttributeError: module 'evaluate'

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-9c043290ad3944268290e015c3063bc411e6ef6b`
	`1`	`+791472d6706b027552f39f11b28d034e4839c9af`