[CI Sprint] Quantization CI Cleanup (#24130) #2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: macOS Apple Silicon Smoke Test | |
| on: | |
| push: | |
| branches: | |
| - main | |
| workflow_dispatch: # Manual trigger | |
| jobs: | |
| macos-m1-smoke-test: | |
| runs-on: macos-latest | |
| timeout-minutes: 20 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: astral-sh/setup-uv@v7 | |
| with: | |
| enable-cache: true | |
| cache-dependency-glob: | | |
| requirements/**/*.txt | |
| pyproject.toml | |
| python-version: '3.12' | |
| - name: Create virtual environment | |
| run: | | |
| uv venv | |
| echo "$GITHUB_WORKSPACE/.venv/bin" >> "$GITHUB_PATH" | |
| - name: Install dependencies and build vLLM | |
| run: | | |
| uv pip install -r requirements/cpu.txt --index-strategy unsafe-best-match | |
| uv pip install -e . | |
| env: | |
| CMAKE_BUILD_PARALLEL_LEVEL: 4 | |
| - name: Verify installation | |
| run: | | |
| python -c "import vllm; print(f'vLLM version: {vllm.__version__}')" | |
| python -c "import torch; print(f'PyTorch: {torch.__version__}')" | |
| - name: Smoke test vllm serve | |
| timeout-minutes: 10 | |
| run: | | |
| # Start server in background | |
| vllm serve Qwen/Qwen3-0.6B \ | |
| --max-model-len=2048 \ | |
| --load-format=dummy \ | |
| --enforce-eager \ | |
| --port 8000 & | |
| SERVER_PID=$! | |
| # Wait for server to start | |
| for i in {1..30}; do | |
| if curl -s http://localhost:8000/health > /dev/null; then | |
| echo "Server started successfully" | |
| break | |
| fi | |
| if [ "$i" -eq 30 ]; then | |
| echo "Server failed to start" | |
| kill "$SERVER_PID" | |
| exit 1 | |
| fi | |
| sleep 2 | |
| done | |
| # Test health endpoint | |
| curl -f http://localhost:8000/health | |
| # Test completion | |
| curl -f http://localhost:8000/v1/completions \ | |
| -H "Content-Type: application/json" \ | |
| -d '{ | |
| "model": "Qwen/Qwen3-0.6B", | |
| "prompt": "Hello", | |
| "max_tokens": 5 | |
| }' | |
| # Cleanup | |
| kill "$SERVER_PID" |