pythonadvanced

Ray Serve ML Model Deployment

Deploy a scalable ML serving endpoint with Ray Serve, handling concurrent requests and model loading.

python
import ray
from ray import serve
from fastapi import FastAPI
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
import numpy as np
from pydantic import BaseModel

ray.init()
serve.start()

app = FastAPI()

@serve.deployment(num_replicas=2, ray_actor_options={'num_cpus': 0.5})
@serve.ingress(app)
class IrisPredictor:
    def __init__(self):
        X, y = load_iris(return_X_y=True)
        self.model = RandomForestClassifier(n_estimators=50, random_state=42).fit(X, y)
        self.classes = ['setosa','versicolor','virginica']

    class Input(BaseModel):
        features: list[float]

    @app.post('/predict')
    def predict(self, data: Input) -> dict:
        pred = self.model.predict([data.features])[0]
        proba = self.model.predict_proba([data.features])[0].tolist()
        return {'class': self.classes[pred], 'probabilities': proba}

handle = serve.run(IrisPredictor.bind())
print('Serving at http://127.0.0.1:8000/predict')

Use Cases

  • ML serving
  • distributed deployment
  • scalable inference

Tags

Related Snippets

Similar patterns you can reuse in the same workflow.