[CI Sprint] Quantization CI Cleanup (#24130) #2

	name: macOS Apple Silicon Smoke Test

	on:
	push:
	branches:
	- main
	workflow_dispatch: # Manual trigger

	jobs:
	macos-m1-smoke-test:
	runs-on: macos-latest
	timeout-minutes: 20

	steps:
	- uses: actions/checkout@v4

	- uses: astral-sh/setup-uv@v7
	with:
	enable-cache: true
	cache-dependency-glob: \|
	requirements/*/.txt
	pyproject.toml
	python-version: '3.12'

	- name: Create virtual environment
	run: \|
	uv venv
	echo "$GITHUB_WORKSPACE/.venv/bin" >> "$GITHUB_PATH"

	- name: Install dependencies and build vLLM
	run: \|
	uv pip install -r requirements/cpu.txt --index-strategy unsafe-best-match
	uv pip install -e .
	env:
	CMAKE_BUILD_PARALLEL_LEVEL: 4

	- name: Verify installation
	run: \|
	python -c "import vllm; print(f'vLLM version: {vllm.__version__}')"
	python -c "import torch; print(f'PyTorch: {torch.__version__}')"

	- name: Smoke test vllm serve
	timeout-minutes: 10
	run: \|
	# Start server in background
	vllm serve Qwen/Qwen3-0.6B \
	--max-model-len=2048 \
	--load-format=dummy \
	--enforce-eager \
	--port 8000 &

	SERVER_PID=$!

	# Wait for server to start
	for i in {1..30}; do
	if curl -s http://localhost:8000/health > /dev/null; then
	echo "Server started successfully"
	break
	fi
	if [ "$i" -eq 30 ]; then
	echo "Server failed to start"
	kill "$SERVER_PID"
	exit 1
	fi
	sleep 2
	done

	# Test health endpoint
	curl -f http://localhost:8000/health

	# Test completion
	curl -f http://localhost:8000/v1/completions \
	-H "Content-Type: application/json" \
	-d '{
	"model": "Qwen/Qwen3-0.6B",
	"prompt": "Hello",
	"max_tokens": 5
	}'

	# Cleanup
	kill "$SERVER_PID"

Provide feedback