Authenticated API
This endpoint requires a valid JWT Bearer token. Accessible via the API gateway at /v1/ai/*.
AI Cost Management API
Track AI model usage, manage costs, set budgets, and optimize spending across all AI services.
Overview
| Attribute | Value |
|---|---|
| Base Path | /api/v1/ai/costs |
| Authentication | Bearer Token |
| Required Roles | analytics_admin, platform_admin, system_admin, super_admin, tenant_admin |
Model Tiers & Pricing
Get Model Pricing
Retrieve current AI model pricing tiers.
GET /api/v1/ai/costs/pricing
Response
{
"pricing_version": "2026-01",
"tiers": [
{
"tier": "T1",
"name": "Free Tier",
"model": "llama-4-scout",
"provider": "workers_ai",
"input_cost_per_million": 0,
"output_cost_per_million": 0,
"use_cases": ["simple_queries", "greetings", "faq"]
},
{
"tier": "T2",
"name": "Fast Tier",
"model": "gemini-2.0-flash",
"provider": "google",
"input_cost_per_million": 0.10,
"output_cost_per_million": 0.40,
"use_cases": ["real_time_conversation", "quick_analysis"]
},
{
"tier": "T3",
"name": "Standard Tier",
"model": "gemini-3-flash",
"provider": "google",
"input_cost_per_million": 0.50,
"output_cost_per_million": 3.00,
"use_cases": ["complex_conversation", "detailed_analysis"]
},
{
"tier": "T4",
"name": "Reasoning Tier",
"model": "claude-haiku-4.5",
"provider": "anthropic",
"input_cost_per_million": 1.00,
"output_cost_per_million": 5.00,
"use_cases": ["fast_reasoning", "multi_step_tasks"]
},
{
"tier": "T5",
"name": "Quality Tier",
"model": "claude-sonnet-4.5",
"provider": "anthropic",
"input_cost_per_million": 3.00,
"output_cost_per_million": 15.00,
"use_cases": ["high_quality_analysis", "complex_reasoning"]
},
{
"tier": "T6",
"name": "Premium Tier",
"model": "claude-opus-4.5",
"provider": "anthropic",
"input_cost_per_million": 5.00,
"output_cost_per_million": 25.00,
"use_cases": ["strategic_planning", "expert_analysis"]
}
],
"effective_date": "2026-01-01"
}
Usage Tracking
Get Usage Summary
Retrieve AI usage summary for a tenant.
GET /api/v1/ai/costs/usage
Query Parameters
| Parameter | Type | Description |
|---|---|---|
tenant_id | uuid | Tenant ID |
start_date | date | Period start |
end_date | date | Period end |
granularity | string | hour, day, week, month |
Response
{
"tenant_id": "tenant_123",
"period": {
"start": "2026-01-01",
"end": "2026-01-24"
},
"summary": {
"total_requests": 125000,
"total_input_tokens": 45000000,
"total_output_tokens": 12000000,
"total_cost": 485.50,
"avg_cost_per_request": 0.0039
},
"by_tier": [
{
"tier": "T1",
"requests": 85000,
"input_tokens": 25000000,
"output_tokens": 8000000,
"cost": 0
},
{
"tier": "T2",
"requests": 25000,
"input_tokens": 12000000,
"output_tokens": 2500000,
"cost": 2.20
},
{
"tier": "T3",
"requests": 10000,
"input_tokens": 5000000,
"output_tokens": 1000000,
"cost": 4.00
},
{
"tier": "T4",
"requests": 3500,
"input_tokens": 2000000,
"output_tokens": 350000,
"cost": 3.75
},
{
"tier": "T5",
"requests": 1200,
"input_tokens": 800000,
"output_tokens": 120000,
"cost": 4.20
},
{
"tier": "T6",
"requests": 300,
"input_tokens": 200000,
"output_tokens": 30000,
"cost": 1.75
}
],
"by_service": [
{
"service": "voice_ai",
"requests": 45000,
"cost": 125.00
},
{
"service": "drive_thru",
"requests": 35000,
"cost": 180.00
},
{
"service": "chat_assistant",
"requests": 25000,
"cost": 95.50
},
{
"service": "analytics",
"requests": 20000,
"cost": 85.00
}
]
}
Get Usage by Day
GET /api/v1/ai/costs/usage/daily
Response
{
"data": [
{
"date": "2026-01-24",
"requests": 5200,
"input_tokens": 1850000,
"output_tokens": 520000,
"cost": 22.50,
"by_tier": {
"T1": 3500,
"T2": 1200,
"T3": 350,
"T4": 120,
"T5": 25,
"T6": 5
}
}
]
}
Get Real-time Usage
GET /api/v1/ai/costs/usage/realtime
Response
{
"timestamp": "2026-01-24T19:30:00Z",
"current_hour": {
"requests": 450,
"cost": 2.15,
"trend": "normal"
},
"today": {
"requests": 5200,
"cost": 22.50,
"budget_percent_used": 45
},
"this_month": {
"requests": 125000,
"cost": 485.50,
"budget_percent_used": 48.5
},
"alerts": []
}
Budget Management
Get Budget
Retrieve budget configuration.
GET /api/v1/ai/costs/budget
Response
{
"tenant_id": "tenant_123",
"budgets": {
"daily": {
"limit": 50.00,
"current": 22.50,
"percent_used": 45,
"action_on_exceed": "alert"
},
"monthly": {
"limit": 1000.00,
"current": 485.50,
"percent_used": 48.5,
"action_on_exceed": "throttle"
}
},
"alerts": {
"thresholds": [50, 75, 90, 100],
"notify_emails": ["admin@example.com"],
"notify_slack": true
},
"throttling": {
"enabled": true,
"soft_limit_percent": 90,
"hard_limit_percent": 100,
"fallback_tier": "T1"
}
}
Set Budget
PUT /api/v1/ai/costs/budget
Request Body
{
"daily_limit": 50.00,
"monthly_limit": 1000.00,
"alerts": {
"thresholds": [50, 75, 90, 100],
"notify_emails": ["admin@example.com"],
"notify_slack": true
},
"throttling": {
"enabled": true,
"soft_limit_percent": 90,
"hard_limit_percent": 100,
"fallback_tier": "T1"
}
}
Get Budget Forecast
GET /api/v1/ai/costs/budget/forecast
Response
{
"forecast": {
"end_of_month_projected": 825.00,
"budget_status": "on_track",
"confidence": 0.85
},
"trend": {
"daily_avg_last_7_days": 28.50,
"daily_avg_last_30_days": 22.00,
"growth_rate_percent": 8.5
},
"recommendations": [
{
"type": "optimization",
"message": "Consider using T1 for FAQ queries - potential savings $45/month",
"potential_savings": 45.00
}
]
}
Cost Optimization
Get Optimization Report
GET /api/v1/ai/costs/optimization
Response
{
"analysis_period": {
"start": "2026-01-01",
"end": "2026-01-24"
},
"current_efficiency": {
"tier_distribution_score": 85,
"cache_hit_rate": 42.5,
"avg_tokens_per_request": 456,
"unnecessary_upgrades": 125
},
"opportunities": [
{
"id": "opt_001",
"type": "tier_downgrade",
"description": "15% of T3 requests could be handled by T2",
"affected_requests": 1500,
"current_cost": 6.00,
"optimized_cost": 1.50,
"monthly_savings": 4.50,
"impact": "low"
},
{
"id": "opt_002",
"type": "caching",
"description": "Enable semantic caching for FAQ responses",
"cache_candidates": 8500,
"monthly_savings": 25.00,
"impact": "none"
},
{
"id": "opt_003",
"type": "prompt_optimization",
"description": "Reduce average prompt length by 20%",
"current_avg_tokens": 1250,
"optimized_avg_tokens": 1000,
"monthly_savings": 15.00,
"impact": "low"
}
],
"total_potential_savings": 44.50
}
Apply Optimization
POST /api/v1/ai/costs/optimization/{optimization_id}/apply
Token Analysis
Analyze Request
Analyze token usage for a request.
POST /api/v1/ai/costs/analyze
Request Body
{
"prompt": "What are today's specials?",
"system_prompt": "You are a helpful restaurant assistant...",
"expected_response_tokens": 200
}
Response
{
"analysis": {
"input_tokens": 125,
"estimated_output_tokens": 200,
"total_tokens": 325
},
"cost_by_tier": {
"T1": 0,
"T2": 0.000093,
"T3": 0.000538,
"T4": 0.001125,
"T5": 0.003375,
"T6": 0.005625
},
"recommended_tier": "T1",
"recommendation_reason": "Simple FAQ query - free tier sufficient"
}
Get Token Breakdown
GET /api/v1/ai/costs/tokens/{request_id}
Response
{
"request_id": "req_abc123",
"breakdown": {
"system_prompt_tokens": 450,
"user_prompt_tokens": 125,
"context_tokens": 850,
"output_tokens": 280,
"total_tokens": 1705
},
"tier_used": "T2",
"cost": 0.00028,
"cache_status": "miss"
}
Billing
Get Invoice Preview
GET /api/v1/ai/costs/billing/preview
Response
{
"period": {
"start": "2026-01-01",
"end": "2026-01-31"
},
"status": "in_progress",
"line_items": [
{
"description": "T1 (Free) - 85,000 requests",
"quantity": 85000,
"unit": "requests",
"rate": 0,
"amount": 0
},
{
"description": "T2 Input Tokens",
"quantity": 12000000,
"unit": "tokens",
"rate": 0.10,
"amount": 1.20
},
{
"description": "T2 Output Tokens",
"quantity": 2500000,
"unit": "tokens",
"rate": 0.40,
"amount": 1.00
}
],
"subtotal": 485.50,
"credits_applied": 0,
"total_due": 485.50,
"projected_total": 825.00
}
List Invoices
GET /api/v1/ai/costs/billing/invoices
Response
{
"data": [
{
"invoice_id": "inv_001",
"period": "2025-12",
"total": 750.00,
"status": "paid",
"paid_at": "2026-01-05T00:00:00Z",
"pdf_url": "https://..."
}
]
}
Alerts
List Cost Alerts
GET /api/v1/ai/costs/alerts
Response
{
"data": [
{
"id": "alert_001",
"type": "budget_threshold",
"severity": "warning",
"message": "Daily budget at 90% ($45.00 of $50.00)",
"threshold_percent": 90,
"current_value": 45.00,
"triggered_at": "2026-01-24T18:00:00Z",
"acknowledged": false
}
]
}
Acknowledge Alert
POST /api/v1/ai/costs/alerts/{alert_id}/acknowledge
Service Quotas
Get Quotas
GET /api/v1/ai/costs/quotas
Response
{
"quotas": {
"requests_per_minute": {
"limit": 1000,
"current": 45,
"percent_used": 4.5
},
"tokens_per_minute": {
"limit": 500000,
"current": 25000,
"percent_used": 5
},
"concurrent_requests": {
"limit": 100,
"current": 12,
"percent_used": 12
}
},
"tier_quotas": {
"T5": {
"daily_requests": 5000,
"used": 1200
},
"T6": {
"daily_requests": 1000,
"used": 300
}
}
}
Webhooks
| Event | Description |
|---|---|
ai_costs.budget_threshold | Budget threshold reached |
ai_costs.budget_exceeded | Budget exceeded |
ai_costs.throttling_started | Throttling activated |
ai_costs.anomaly_detected | Unusual usage pattern |
ai_costs.invoice_ready | Monthly invoice ready |
Error Responses
| Status | Code | Description |
|---|---|---|
| 400 | invalid_date_range | Date range invalid |
| 403 | budget_exceeded | Budget limit reached |
| 404 | tenant_not_found | Tenant ID not found |
| 429 | quota_exceeded | Rate quota exceeded |
Related Documentation
- ACP Router Guide - Model routing
- AI Gateway - AI gateway API
- ACP Foundation - AI platform foundation