pythonintermediate

ONNX Runtime Fast ML Inference

Export a PyTorch model to ONNX and run fast CPU inference with ONNX Runtime.

pythonPress ⌘/Ctrl + Shift + C to copy

import torch
import torch.nn as nn
import onnxruntime as ort
import numpy as np

class SimpleModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(10, 64), nn.ReLU(), nn.Linear(64, 3))

    def forward(self, x):
        return self.net(x)

model = SimpleModel()
model.eval()

# Export to ONNX
dummy = torch.randn(1, 10)
torch.onnx.export(model, dummy, 'model.onnx', input_names=['input'], output_names=['output'], dynamic_axes={'input': {0: 'batch'}, 'output': {0: 'batch'}})

# Run inference with ONNX Runtime
ort_session = ort.InferenceSession('model.onnx', providers=['CPUExecutionProvider'])

batch = np.random.randn(8, 10).astype(np.float32)
result = ort_session.run(None, {'input': batch})
print('ONNX output shape:', result[0].shape)  # (8, 3)