Integrate with LiteLLM
Architecture:
Client -> Gateway -> Fairvisor Edge -> LiteLLM -> LLM Provider
Fairvisor enforces limits before traffic reaches LiteLLM.
Example policy bundle snippet
{
"bundle_version": 1,
"policies": [
{
"id": "litellm-chat",
"spec": {
"selector": { "pathPrefix": "/v1/chat/completions", "methods": ["POST"] },
"rules": [
{
"name": "org-tpm",
"limit_keys": ["header:x-litellm-user"],
"algorithm": "token_bucket_llm",
"algorithm_config": {
"tokens_per_minute": 120000,
"tokens_per_day": 2000000,
"burst_tokens": 120000
}
}
]
}
}
]
}
Compose example
services:
fairvisor:
image: ghcr.io/fairvisor/edge:latest
ports: ["8080:8080"]
environment:
FAIRVISOR_MODE: reverse_proxy
FAIRVISOR_CONFIG_FILE: /etc/fairvisor/policy.json
FAIRVISOR_BACKEND_URL: http://litellm:4000
volumes:
- ./policy.json:/etc/fairvisor/policy.json:ro
litellm:
image: ghcr.io/berriai/litellm:latest
ports: ["4000:4000"]