091 FastAPI로 모델 배포하기

키워드: FastAPI, 배포, REST API, 서빙

개요

FastAPI는 Python으로 고성능 REST API를 구축하는 현대적인 웹 프레임워크입니다. FLAML 모델을 FastAPI로 배포하면 실시간 예측 서비스를 쉽게 구축할 수 있습니다.

실습 환경

Python 버전: 3.11 권장
필요 패키지: flaml[automl], fastapi, uvicorn

pip install flaml[automl] fastapi uvicorn pydantic pandas numpy

모델 준비

import numpy as np
import joblib
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from flaml import AutoML

# 091 데이터 준비 및 모델 학습
np.random.seed(42)
X, y = make_classification(n_samples=2000, n_features=10, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 091 FLAML 학습
automl = AutoML()
automl.fit(X_train, y_train, task="classification", time_budget=30, verbose=0)

# 091 모델 저장
joblib.dump(automl, 'api_model.pkl')
print(f"모델 저장 완료: {automl.best_estimator}")

기본 FastAPI 서버

# 091 app.py
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import joblib
import numpy as np
from typing import List

# 091 FastAPI 앱 생성
app = FastAPI(
    title="FLAML Model API",
    description="FLAML AutoML 모델 예측 API",
    version="1.0.0"
)

# 091 모델 로드
model = joblib.load('api_model.pkl')

# 091 요청 스키마
class PredictionRequest(BaseModel):
    features: List[float]

    class Config:
        schema_extra = {
            "example": {
                "features": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
            }
        }

# 091 응답 스키마
class PredictionResponse(BaseModel):
    prediction: int
    probability: List[float]

# 091 헬스 체크
@app.get("/health")
def health_check():
    return {"status": "healthy", "model": model.best_estimator}

# 091 단일 예측
@app.post("/predict", response_model=PredictionResponse)
def predict(request: PredictionRequest):
    try:
        features = np.array(request.features).reshape(1, -1)
        prediction = int(model.predict(features)[0])
        probability = model.predict_proba(features)[0].tolist()

        return PredictionResponse(
            prediction=prediction,
            probability=probability
        )
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

print("FastAPI 앱 정의 완료")
print("실행: uvicorn app:app --reload")

배치 예측 API

# 091 배치 요청 스키마
class BatchPredictionRequest(BaseModel):
    data: List[List[float]]

class BatchPredictionResponse(BaseModel):
    predictions: List[int]
    probabilities: List[List[float]]

# 091 배치 예측 엔드포인트
@app.post("/predict/batch", response_model=BatchPredictionResponse)
def predict_batch(request: BatchPredictionRequest):
    try:
        features = np.array(request.data)
        predictions = model.predict(features).tolist()
        probabilities = model.predict_proba(features).tolist()

        return BatchPredictionResponse(
            predictions=predictions,
            probabilities=probabilities
        )
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

print("배치 예측 API 추가")

모델 정보 API

# 091 모델 정보 응답
class ModelInfo(BaseModel):
    model_type: str
    best_estimator: str
    n_features: int
    classes: List[int]

@app.get("/model/info", response_model=ModelInfo)
def get_model_info():
    return ModelInfo(
        model_type="FLAML AutoML",
        best_estimator=model.best_estimator,
        n_features=10,
        classes=[0, 1]
    )

print("모델 정보 API 추가")

완전한 FastAPI 앱 (app.py)

"""
# 091 app.py - 완전한 FLAML FastAPI 서버

from fastapi import FastAPI, HTTPException, BackgroundTasks
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
import joblib
import numpy as np
from typing import List, Optional
import logging
from datetime import datetime

# 091 로깅 설정
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# 091 FastAPI 앱
app = FastAPI(
    title="FLAML Prediction API",
    description="AutoML 기반 예측 서비스",
    version="1.0.0"
)

# 091 CORS 설정
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# 091 모델 로드
model = None

@app.on_event("startup")
async def load_model():
    global model
    model = joblib.load('api_model.pkl')
    logger.info(f"모델 로드 완료: {model.best_estimator}")

# 091 스키마
class PredictRequest(BaseModel):
    features: List[float] = Field(..., min_items=10, max_items=10)

class PredictResponse(BaseModel):
    prediction: int
    probability: float
    class_probabilities: List[float]
    timestamp: str

class BatchRequest(BaseModel):
    data: List[List[float]]

class BatchResponse(BaseModel):
    predictions: List[int]
    count: int

# 091 엔드포인트
@app.get("/")
def root():
    return {"message": "FLAML Prediction API", "status": "running"}

@app.get("/health")
def health():
    return {
        "status": "healthy",
        "model_loaded": model is not None,
        "timestamp": datetime.now().isoformat()
    }

@app.post("/predict", response_model=PredictResponse)
def predict(request: PredictRequest):
    if model is None:
        raise HTTPException(status_code=503, detail="Model not loaded")

    try:
        X = np.array(request.features).reshape(1, -1)
        pred = int(model.predict(X)[0])
        proba = model.predict_proba(X)[0]

        return PredictResponse(
            prediction=pred,
            probability=float(proba[pred]),
            class_probabilities=proba.tolist(),
            timestamp=datetime.now().isoformat()
        )
    except Exception as e:
        logger.error(f"Prediction error: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/predict/batch", response_model=BatchResponse)
def predict_batch(request: BatchRequest):
    if model is None:
        raise HTTPException(status_code=503, detail="Model not loaded")

    try:
        X = np.array(request.data)
        predictions = model.predict(X).astype(int).tolist()

        return BatchResponse(
            predictions=predictions,
            count=len(predictions)
        )
    except Exception as e:
        logger.error(f"Batch prediction error: {e}")
        raise HTTPException(status_code=500, detail=str(e))

# 091 실행: uvicorn app:app --host 0.0.0.0 --port 8000
"""

print("완전한 FastAPI 앱 코드 (app.py)")

클라이언트 테스트

import requests

# 091 API 테스트 함수
def test_api(base_url="http://localhost:8000"):
    """API 테스트"""

    # 1. 헬스 체크
    print("1. 헬스 체크:")
    response = requests.get(f"{base_url}/health")
    print(f"   상태: {response.json()}")

    # 2. 단일 예측
    print("\n2. 단일 예측:")
    data = {"features": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]}
    response = requests.post(f"{base_url}/predict", json=data)
    print(f"   결과: {response.json()}")

    # 3. 배치 예측
    print("\n3. 배치 예측:")
    batch_data = {
        "data": [
            [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
            [1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]
        ]
    }
    response = requests.post(f"{base_url}/predict/batch", json=batch_data)
    print(f"   결과: {response.json()}")

# 091 테스트 실행 (서버 실행 중일 때)
# 091 test_api()
print("\n서버 실행 후 test_api() 함수로 테스트하세요")

비동기 예측

"""
# 091 비동기 예측을 위한 백그라운드 작업

from fastapi import BackgroundTasks
import asyncio

# 091 작업 저장소
tasks_store = {}

class AsyncPredictRequest(BaseModel):
    features: List[List[float]]
    callback_url: Optional[str] = None

@app.post("/predict/async")
async def predict_async(request: AsyncPredictRequest, background_tasks: BackgroundTasks):
    task_id = str(uuid.uuid4())
    tasks_store[task_id] = {"status": "processing"}

    background_tasks.add_task(
        process_prediction,
        task_id,
        request.features,
        request.callback_url
    )

    return {"task_id": task_id, "status": "accepted"}

async def process_prediction(task_id, features, callback_url):
    try:
        X = np.array(features)
        predictions = model.predict(X).tolist()

        tasks_store[task_id] = {
            "status": "completed",
            "predictions": predictions
        }

        if callback_url:
            requests.post(callback_url, json=tasks_store[task_id])

    except Exception as e:
        tasks_store[task_id] = {"status": "failed", "error": str(e)}

@app.get("/predict/async/{task_id}")
def get_async_result(task_id: str):
    if task_id not in tasks_store:
        raise HTTPException(status_code=404, detail="Task not found")
    return tasks_store[task_id]
"""

print("비동기 예측 API 코드")

인증 추가

"""
# 091 API 키 인증

from fastapi.security import APIKeyHeader
from fastapi import Security, Depends

API_KEYS = {"valid-api-key-123"}
api_key_header = APIKeyHeader(name="X-API-Key")

async def verify_api_key(api_key: str = Security(api_key_header)):
    if api_key not in API_KEYS:
        raise HTTPException(status_code=403, detail="Invalid API Key")
    return api_key

@app.post("/predict/secure")
def predict_secure(
    request: PredictRequest,
    api_key: str = Depends(verify_api_key)
):
    # 인증된 요청만 처리
    X = np.array(request.features).reshape(1, -1)
    return {"prediction": int(model.predict(X)[0])}
"""

print("API 키 인증 코드")

실행 명령어

commands = """
=== FastAPI 서버 실행 방법 ===

1. 개발 서버 (자동 리로드):
   uvicorn app:app --reload --host 0.0.0.0 --port 8000

2. 프로덕션 서버 (Gunicorn + Uvicorn):
   gunicorn app:app -w 4 -k uvicorn.workers.UvicornWorker -b 0.0.0.0:8000

3. API 문서:
   - Swagger UI: http://localhost:8000/docs
   - ReDoc: http://localhost:8000/redoc

4. 테스트:
   curl -X POST "http://localhost:8000/predict" \\
        -H "Content-Type: application/json" \\
        -d '{"features": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]}'
"""

print(commands)

정리

FastAPI: 고성능 Python API 프레임워크
Pydantic: 요청/응답 스키마 정의
단일/배치 예측: POST 엔드포인트
비동기: 대용량 요청 처리
인증: API 키 기반 보안

다음 글 예고

다음 글에서는 Docker로 배포 환경 구성을 알아봅니다. 컨테이너화를 통한 일관된 배포 환경을 구축합니다.

FLAML AutoML 마스터 시리즈 #091

개요​

실습 환경​

모델 준비​

기본 FastAPI 서버​

배치 예측 API​

모델 정보 API​

완전한 FastAPI 앱 (app.py)​

클라이언트 테스트​

비동기 예측​

인증 추가​

실행 명령어​

정리​

다음 글 예고​

개요

실습 환경

모델 준비

기본 FastAPI 서버

배치 예측 API

모델 정보 API

완전한 FastAPI 앱 (app.py)

클라이언트 테스트

비동기 예측

인증 추가

실행 명령어

정리

다음 글 예고