litellm-rs/config/gateway.yaml.example at main · majiayu000/litellm-rs · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# LiteLLM-RS Gateway Configuration Example
# This example matches current `src/config/models/*` schema.

# Configuration schema version
schema_version: "1.0"

server:
  host: "0.0.0.0"
  port: 8000
  workers: 4
  max_connections: 1000
  timeout: 30
  max_body_size: 10485760
  dev_mode: false
  cors:
    enabled: true
    # IMPORTANT: empty list is restrictive (no cross-origin allowed).
    # Set explicit origins in production.
    allowed_origins:
      - "https://your-app.example.com"
    allowed_methods: ["GET", "POST", "PUT", "DELETE", "OPTIONS"]
    allowed_headers: ["authorization", "content-type", "x-requested-with"]
    max_age: 3600
    allow_credentials: false

providers:
  - name: "openai-primary"
    provider_type: "openai"
    api_key: "${OPENAI_API_KEY}"
    base_url: "https://api.openai.com/v1"
    weight: 10.0
    rpm: 1000
    tpm: 100000
    max_concurrent_requests: 100
    timeout: 30
    max_retries: 3
    retry:
      base_delay: 100
      max_delay: 5000
      backoff_multiplier: 2.0
      jitter: 0.1
    health_check:
      interval: 30
      failure_threshold: 5
      recovery_timeout: 60
      endpoint: null
      expected_codes: [200]
    settings: {}
    models:
      - "gpt-4o"
      - "gpt-4.1-mini"
    tags: ["primary", "production"]
    enabled: true

  - name: "anthropic-backup"
    provider_type: "anthropic"
    api_key: "${ANTHROPIC_API_KEY}"
    weight: 5.0
    rpm: 500
    tpm: 50000
    max_concurrent_requests: 50
    timeout: 30
    max_retries: 3
    settings: {}
    models:
      - "claude-3-5-sonnet-latest"
    tags: ["backup"]
    enabled: true

router:
  strategy:
    type: "least_latency"
  circuit_breaker:
    failure_threshold: 5
    recovery_timeout: 60
    min_requests: 10
    success_threshold: 3
  load_balancer:
    health_check_enabled: true
    sticky_sessions: false
    session_timeout: 3600

storage:
  database:
    url: "postgresql://localhost/litellm"
    max_connections: 10
    connection_timeout: 5
    ssl: false
    enabled: false
  redis:
    url: "redis://localhost:6379"
    enabled: true
    max_connections: 20
    connection_timeout: 5
    cluster: false
  vector_db: null

auth:
  enable_jwt: true
  enable_api_key: true
  jwt_secret: "ReplaceWithAtLeast32CharsMixedCaseAndSymbols123!"
  jwt_expiration: 86400
  api_key_header: "Authorization"
  rbac:
    enabled: false
    default_role: "user"
    admin_roles: ["admin", "superuser"]

monitoring:
  metrics:
    enabled: true
    port: 9090
    path: "/metrics"
    interval_seconds: 15
  tracing:
    enabled: false
    endpoint: null
    service_name: "litellm-rs"
    sampling_rate: 0.1
    jaeger: null
  health:
    path: "/health"
    detailed: true
  logging: null

cache:
  enabled: false
  ttl: 3600
  max_size: 1000
  semantic_cache: false
  similarity_threshold: 0.95

pricing:
  # PricingService source path/URL (set to null to use service default behavior)
  source: "config/model_prices_extended.json"

rate_limit:
  enabled: true
  strategy: token_bucket
  default_rpm: 1000
  default_tpm: 100000
  requests_per_second: null
  requests_per_minute: null
  tokens_per_minute: null
  burst_size: null

enterprise:
  enabled: false
  sso: null
  audit_logging: false
  advanced_analytics: false