-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathgateway.yaml.example
More file actions
151 lines (139 loc) · 3.18 KB
/
gateway.yaml.example
File metadata and controls
151 lines (139 loc) · 3.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# LiteLLM-RS Gateway Configuration Example
# This example matches current `src/config/models/*` schema.
# Configuration schema version
schema_version: "1.0"
server:
host: "0.0.0.0"
port: 8000
workers: 4
max_connections: 1000
timeout: 30
max_body_size: 10485760
dev_mode: false
cors:
enabled: true
# IMPORTANT: empty list is restrictive (no cross-origin allowed).
# Set explicit origins in production.
allowed_origins:
- "https://your-app.example.com"
allowed_methods: ["GET", "POST", "PUT", "DELETE", "OPTIONS"]
allowed_headers: ["authorization", "content-type", "x-requested-with"]
max_age: 3600
allow_credentials: false
providers:
- name: "openai-primary"
provider_type: "openai"
api_key: "${OPENAI_API_KEY}"
base_url: "https://api.openai.com/v1"
weight: 10.0
rpm: 1000
tpm: 100000
max_concurrent_requests: 100
timeout: 30
max_retries: 3
retry:
base_delay: 100
max_delay: 5000
backoff_multiplier: 2.0
jitter: 0.1
health_check:
interval: 30
failure_threshold: 5
recovery_timeout: 60
endpoint: null
expected_codes: [200]
settings: {}
models:
- "gpt-4o"
- "gpt-4.1-mini"
tags: ["primary", "production"]
enabled: true
- name: "anthropic-backup"
provider_type: "anthropic"
api_key: "${ANTHROPIC_API_KEY}"
weight: 5.0
rpm: 500
tpm: 50000
max_concurrent_requests: 50
timeout: 30
max_retries: 3
settings: {}
models:
- "claude-3-5-sonnet-latest"
tags: ["backup"]
enabled: true
router:
strategy:
type: "least_latency"
circuit_breaker:
failure_threshold: 5
recovery_timeout: 60
min_requests: 10
success_threshold: 3
load_balancer:
health_check_enabled: true
sticky_sessions: false
session_timeout: 3600
storage:
database:
url: "postgresql://localhost/litellm"
max_connections: 10
connection_timeout: 5
ssl: false
enabled: false
redis:
url: "redis://localhost:6379"
enabled: true
max_connections: 20
connection_timeout: 5
cluster: false
vector_db: null
auth:
enable_jwt: true
enable_api_key: true
jwt_secret: "ReplaceWithAtLeast32CharsMixedCaseAndSymbols123!"
jwt_expiration: 86400
api_key_header: "Authorization"
rbac:
enabled: false
default_role: "user"
admin_roles: ["admin", "superuser"]
monitoring:
metrics:
enabled: true
port: 9090
path: "/metrics"
interval_seconds: 15
tracing:
enabled: false
endpoint: null
service_name: "litellm-rs"
sampling_rate: 0.1
jaeger: null
health:
path: "/health"
detailed: true
logging: null
cache:
enabled: false
ttl: 3600
max_size: 1000
semantic_cache: false
similarity_threshold: 0.95
pricing:
# PricingService source path/URL (set to null to use service default behavior)
source: "config/model_prices_extended.json"
rate_limit:
enabled: true
strategy: token_bucket
default_rpm: 1000
default_tpm: 100000
requests_per_second: null
requests_per_minute: null
tokens_per_minute: null
burst_size: null
enterprise:
enabled: false
sso: null
audit_logging: false
advanced_analytics: false