Integrate with LiteLLM

Architecture:

Client -> Gateway -> Fairvisor Edge -> LiteLLM -> LLM Provider

Fairvisor enforces limits before traffic reaches LiteLLM.

Example policy bundle snippet

{
  "bundle_version": 1,
  "policies": [
    {
      "id": "litellm-chat",
      "spec": {
        "selector": { "pathPrefix": "/v1/chat/completions", "methods": ["POST"] },
        "rules": [
          {
            "name": "org-tpm",
            "limit_keys": ["header:x-litellm-user"],
            "algorithm": "token_bucket_llm",
            "algorithm_config": {
              "tokens_per_minute": 120000,
              "tokens_per_day": 2000000,
              "burst_tokens": 120000
            }
          }
        ]
      }
    }
  ]
}

Compose example

services:
  fairvisor:
    image: ghcr.io/fairvisor/edge:latest
    ports: ["8080:8080"]
    environment:
      FAIRVISOR_MODE: reverse_proxy
      FAIRVISOR_CONFIG_FILE: /etc/fairvisor/policy.json
      FAIRVISOR_BACKEND_URL: http://litellm:4000
    volumes:
      - ./policy.json:/etc/fairvisor/policy.json:ro

  litellm:
    image: ghcr.io/berriai/litellm:latest
    ports: ["4000:4000"]