This guide covers all installation methods for Nano Qwen3 Serving, from basic pip installation to advanced development setups.
The easiest way to install Nano Qwen3 Serving:
pip install nano-qwen3-serving
Verify installation:
python -c "import nano_qwen3_serving; print('Installation successful!')"
For development or custom modifications:
# Clone the repository
git clone https://github.com/hsliuustc/nano-qwen3-serving.git
cd nano-qwen3-serving
# Install in development mode
pip install -e .
# Or install with all dependencies
pip install -e ".[dev]"
Using conda for environment management:
# Create a new conda environment
conda create -n nano-qwen3 python=3.11
conda activate nano-qwen3
# Install the package
pip install nano-qwen3-serving
For containerized deployment:
# Pull the Docker image
docker pull hsliuustc/nano-qwen3-serving:latest
# Run the container
docker run -p 8000:8000 hsliuustc/nano-qwen3-serving:latest
The following packages are automatically installed:
Install additional features:
# Development dependencies
pip install nano-qwen3-serving[dev]
# Testing dependencies
pip install nano-qwen3-serving[test]
# All dependencies
pip install nano-qwen3-serving[all]
Using venv (recommended):
# Create virtual environment
python -m venv nano-qwen3-env
# Activate environment
source nano-qwen3-env/bin/activate # macOS/Linux
# nano-qwen3-env\Scripts\activate # Windows
# Install package
pip install nano-qwen3-serving
Using conda:
# Create conda environment
conda create -n nano-qwen3 python=3.11
# Activate environment
conda activate nano-qwen3
# Install package
pip install nano-qwen3-serving
Set optional environment variables:
# Server configuration
export NANO_QWEN3_PORT=8000
export NANO_QWEN3_HOST=127.0.0.1
export NANO_QWEN3_MODEL=Qwen/Qwen3-0.6B
export NANO_QWEN3_DEVICE=mps
# Logging
export NANO_QWEN3_LOG_LEVEL=info
# Hugging Face
export HUGGING_FACE_HUB_TOKEN=your_token_here
export HF_HOME=./models # Custom model cache directory
Configure where models are stored:
# Set Hugging Face cache directory
export HF_HOME=./models
# Or use Python
import os
os.environ['HF_HOME'] = './models'
Check if you have Apple Silicon:
# Check processor architecture
uname -m
# Should return: arm64
# Check processor type
sysctl -n machdep.cpu.brand_string
# Should contain: Apple M1, M2, or M3
# Install PyTorch with MPS support
pip install torch torchvision torchaudio
# Verify MPS availability
python -c "import torch; print(f'MPS available: {torch.backends.mps.is_available()}')"
import torch
# Check MPS availability
if torch.backends.mps.is_available():
device = torch.device("mps")
print("MPS is available!")
else:
device = torch.device("cpu")
print("MPS not available, using CPU")
Test the installation:
# Check if package is installed
python -c "import nano_qwen3_serving; print('Package installed successfully')"
# Check version
python -c "import nano_qwen3_serving; print(nano_qwen3_serving.__version__)"
Test model downloading:
from transformers import AutoTokenizer, AutoModelForCausalLM
# Test small model download
model_name = "Qwen/Qwen3-0.6B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
print("Model downloaded successfully!")
Test the server:
# Start server in background
python -m nano_qwen3_serving --port 8001 &
# Test health endpoint
curl http://localhost:8001/health
# Stop server
pkill -f "nano_qwen3_serving"
git clone https://github.com/hsliuustc/nano-qwen3-serving.git
cd nano-qwen3-serving
# Install in development mode
pip install -e ".[dev]"
# Or install manually
pip install -e .
pip install pytest pytest-asyncio black isort mypy
# Install pre-commit
pip install pre-commit
# Install hooks
pre-commit install
# Run all tests
pytest
# Run with coverage
pytest --cov=nano_qwen3_serving
# Run specific test file
pytest tests/test_basic.py
# Build from source
docker build -t nano-qwen3-serving .
# Or pull from registry
docker pull hsliuustc/nano-qwen3-serving:latest
# Basic run
docker run -p 8000:8000 nano-qwen3-serving
# With custom model
docker run -p 8000:8000 -e NANO_QWEN3_MODEL=Qwen/Qwen3-1.5B nano-qwen3-serving
# With volume for model cache
docker run -p 8000:8000 -v ./models:/app/models nano-qwen3-serving
Create docker-compose.yml
:
version: '3.8'
services:
nano-qwen3:
image: hsliuustc/nano-qwen3-serving:latest
ports:
- "8000:8000"
environment:
- NANO_QWEN3_MODEL=Qwen/Qwen3-0.6B
- NANO_QWEN3_DEVICE=mps
volumes:
- ./models:/app/models
restart: unless-stopped
Run with:
docker-compose up -d
# Use user installation
pip install --user nano-qwen3-serving
# Reinstall PyTorch
pip uninstall torch torchvision torchaudio
pip install torch torchvision torchaudio
# Set Hugging Face token
export HUGGING_FACE_HUB_TOKEN=your_token
# Or use mirror
export HF_ENDPOINT=https://hf-mirror.com
# Use smaller model
python -m nano_qwen3_serving --model Qwen/Qwen3-0.6B
After installation:
π Installation complete! Youβre ready to start using Nano Qwen3 Serving!