發布時間:2025-10-16
# 安裝Python和虛擬環境工具sudo apt update && sudo apt install -y python3 python3-venv python3-pip git gcc# 創建虛擬環境并激活python3 -m venv phi3-envsource phi3-env/bin/activate # Ubuntu/Debian# 若為CentOS:source phi3-env/bin/activate# 升級pippip install --upgrade pip# 安裝PyTorch(CPU版,適配低配置)pip3 install torch==2.1.0+cpu torchvision==0.16.0+cpu torchaudio==2.1.0+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html# 安裝模型運行依賴pip install transformers==4.38.2 accelerate==0.30.1 sentencepiece==0.1.99 flask==2.3.3 # flask用于搭建API服務# 安裝模型下載工具(可選,加速下載)pip install huggingface-hub[cli]# 登錄Hugging Face(需注冊賬號,獲取訪問令牌:https://huggingface.co/settings/tokens)huggingface-cli login# 下載INT4量化版模型(約4GB,8G內存適配最佳)huggingface-cli download microsoft/Phi-3-mini-4K-Instruct --local-dir phi3-model --local-dir-use-symlinks False --revision mainfrom flask import Flask, request, jsonifyfrom transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfigapp = Flask(__name__)# 配置量化參數(關鍵:降低內存占用)bnb_config = BitsAndBytesConfig( load_in_4bit=True, # 啟用4位量化 bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float32)# 加載模型和tokenizertokenizer = AutoTokenizer.from_pretrained("./phi3-model")model = AutoModelForCausalLM.from_pretrained( "./phi3-model", quantization_config=bnb_config, device_map="auto", # 自動分配設備(CPU優先) trust_remote_code=True)# 定義生成函數(私人AI助手核心邏輯)def generate_response(prompt, max_new_tokens=512, temperature=0.7): inputs = tokenizer( f"<|user|>\n{prompt}\n<|assistant|>", return_tensors="pt", truncation=True, max_length=4096 ).to(model.device) outputs = model.generate( **inputs, max_new_tokens=max_new_tokens, temperature=temperature, do_sample=True, eos_token_id=tokenizer.eos_token_id ) return tokenizer.decode(outputs[0], skip_special_tokens=True).split("<|assistant|>")[-1].strip()# 搭建API接口(支持HTTP調用)@app.route("/api/chat", methods=["POST"])def chat_api(): data = request.json prompt = data.get("prompt", "請介紹一下自己") response = generate_response(prompt) return jsonify({"response": response})if __name__ == "__main__": # 啟動服務(默認端口5000,允許外網訪問) app.run(host="0.0.0.0", port=5000, debug=False) # 生產環境關閉debug# 后臺啟動服務(避免終端關閉后停止)nohup python phi3_server.py > phi3.log 2>&1 &# 查看啟動日志(確認是否成功)tail -f phi3.log# 成功標識:"Running on http://0.0.0.0:5000"# 使用curl測試APIcurl -X POST http://你的服務器IP:5000/api/chat \-H "Content-Type: application/json" \-d '{"prompt": "請幫我寫一個Python爬蟲腳本,爬取網頁標題"}'
sudo fallocate -l 4G /swapfilesudo chmod 600 /swapfilesudo mkswap /swapfile && sudo swapon /swapfilepip install gunicorngunicorn -w 2 -b 0.0.0.0:5000 phi3_server:app # 2個工作進程,匹配2核CPU