Skip to main content
Authenticated API

This endpoint requires a valid JWT Bearer token. Accessible via the API gateway at /v1/ai/*.

AI Cost Management API

Track AI model usage, manage costs, set budgets, and optimize spending across all AI services.

Overview

AttributeValue
Base Path/api/v1/ai/costs
AuthenticationBearer Token
Required Rolesanalytics_admin, platform_admin, system_admin, super_admin, tenant_admin

Model Tiers & Pricing

Get Model Pricing

Retrieve current AI model pricing tiers.

GET /api/v1/ai/costs/pricing

Response

{
"pricing_version": "2026-01",
"tiers": [
{
"tier": "T1",
"name": "Free Tier",
"model": "llama-4-scout",
"provider": "workers_ai",
"input_cost_per_million": 0,
"output_cost_per_million": 0,
"use_cases": ["simple_queries", "greetings", "faq"]
},
{
"tier": "T2",
"name": "Fast Tier",
"model": "gemini-2.0-flash",
"provider": "google",
"input_cost_per_million": 0.10,
"output_cost_per_million": 0.40,
"use_cases": ["real_time_conversation", "quick_analysis"]
},
{
"tier": "T3",
"name": "Standard Tier",
"model": "gemini-3-flash",
"provider": "google",
"input_cost_per_million": 0.50,
"output_cost_per_million": 3.00,
"use_cases": ["complex_conversation", "detailed_analysis"]
},
{
"tier": "T4",
"name": "Reasoning Tier",
"model": "claude-haiku-4.5",
"provider": "anthropic",
"input_cost_per_million": 1.00,
"output_cost_per_million": 5.00,
"use_cases": ["fast_reasoning", "multi_step_tasks"]
},
{
"tier": "T5",
"name": "Quality Tier",
"model": "claude-sonnet-4.5",
"provider": "anthropic",
"input_cost_per_million": 3.00,
"output_cost_per_million": 15.00,
"use_cases": ["high_quality_analysis", "complex_reasoning"]
},
{
"tier": "T6",
"name": "Premium Tier",
"model": "claude-opus-4.5",
"provider": "anthropic",
"input_cost_per_million": 5.00,
"output_cost_per_million": 25.00,
"use_cases": ["strategic_planning", "expert_analysis"]
}
],
"effective_date": "2026-01-01"
}

Usage Tracking

Get Usage Summary

Retrieve AI usage summary for a tenant.

GET /api/v1/ai/costs/usage

Query Parameters

ParameterTypeDescription
tenant_iduuidTenant ID
start_datedatePeriod start
end_datedatePeriod end
granularitystringhour, day, week, month

Response

{
"tenant_id": "tenant_123",
"period": {
"start": "2026-01-01",
"end": "2026-01-24"
},
"summary": {
"total_requests": 125000,
"total_input_tokens": 45000000,
"total_output_tokens": 12000000,
"total_cost": 485.50,
"avg_cost_per_request": 0.0039
},
"by_tier": [
{
"tier": "T1",
"requests": 85000,
"input_tokens": 25000000,
"output_tokens": 8000000,
"cost": 0
},
{
"tier": "T2",
"requests": 25000,
"input_tokens": 12000000,
"output_tokens": 2500000,
"cost": 2.20
},
{
"tier": "T3",
"requests": 10000,
"input_tokens": 5000000,
"output_tokens": 1000000,
"cost": 4.00
},
{
"tier": "T4",
"requests": 3500,
"input_tokens": 2000000,
"output_tokens": 350000,
"cost": 3.75
},
{
"tier": "T5",
"requests": 1200,
"input_tokens": 800000,
"output_tokens": 120000,
"cost": 4.20
},
{
"tier": "T6",
"requests": 300,
"input_tokens": 200000,
"output_tokens": 30000,
"cost": 1.75
}
],
"by_service": [
{
"service": "voice_ai",
"requests": 45000,
"cost": 125.00
},
{
"service": "drive_thru",
"requests": 35000,
"cost": 180.00
},
{
"service": "chat_assistant",
"requests": 25000,
"cost": 95.50
},
{
"service": "analytics",
"requests": 20000,
"cost": 85.00
}
]
}

Get Usage by Day

GET /api/v1/ai/costs/usage/daily

Response

{
"data": [
{
"date": "2026-01-24",
"requests": 5200,
"input_tokens": 1850000,
"output_tokens": 520000,
"cost": 22.50,
"by_tier": {
"T1": 3500,
"T2": 1200,
"T3": 350,
"T4": 120,
"T5": 25,
"T6": 5
}
}
]
}

Get Real-time Usage

GET /api/v1/ai/costs/usage/realtime

Response

{
"timestamp": "2026-01-24T19:30:00Z",
"current_hour": {
"requests": 450,
"cost": 2.15,
"trend": "normal"
},
"today": {
"requests": 5200,
"cost": 22.50,
"budget_percent_used": 45
},
"this_month": {
"requests": 125000,
"cost": 485.50,
"budget_percent_used": 48.5
},
"alerts": []
}

Budget Management

Get Budget

Retrieve budget configuration.

GET /api/v1/ai/costs/budget

Response

{
"tenant_id": "tenant_123",
"budgets": {
"daily": {
"limit": 50.00,
"current": 22.50,
"percent_used": 45,
"action_on_exceed": "alert"
},
"monthly": {
"limit": 1000.00,
"current": 485.50,
"percent_used": 48.5,
"action_on_exceed": "throttle"
}
},
"alerts": {
"thresholds": [50, 75, 90, 100],
"notify_emails": ["admin@example.com"],
"notify_slack": true
},
"throttling": {
"enabled": true,
"soft_limit_percent": 90,
"hard_limit_percent": 100,
"fallback_tier": "T1"
}
}

Set Budget

PUT /api/v1/ai/costs/budget

Request Body

{
"daily_limit": 50.00,
"monthly_limit": 1000.00,
"alerts": {
"thresholds": [50, 75, 90, 100],
"notify_emails": ["admin@example.com"],
"notify_slack": true
},
"throttling": {
"enabled": true,
"soft_limit_percent": 90,
"hard_limit_percent": 100,
"fallback_tier": "T1"
}
}

Get Budget Forecast

GET /api/v1/ai/costs/budget/forecast

Response

{
"forecast": {
"end_of_month_projected": 825.00,
"budget_status": "on_track",
"confidence": 0.85
},
"trend": {
"daily_avg_last_7_days": 28.50,
"daily_avg_last_30_days": 22.00,
"growth_rate_percent": 8.5
},
"recommendations": [
{
"type": "optimization",
"message": "Consider using T1 for FAQ queries - potential savings $45/month",
"potential_savings": 45.00
}
]
}

Cost Optimization

Get Optimization Report

GET /api/v1/ai/costs/optimization

Response

{
"analysis_period": {
"start": "2026-01-01",
"end": "2026-01-24"
},
"current_efficiency": {
"tier_distribution_score": 85,
"cache_hit_rate": 42.5,
"avg_tokens_per_request": 456,
"unnecessary_upgrades": 125
},
"opportunities": [
{
"id": "opt_001",
"type": "tier_downgrade",
"description": "15% of T3 requests could be handled by T2",
"affected_requests": 1500,
"current_cost": 6.00,
"optimized_cost": 1.50,
"monthly_savings": 4.50,
"impact": "low"
},
{
"id": "opt_002",
"type": "caching",
"description": "Enable semantic caching for FAQ responses",
"cache_candidates": 8500,
"monthly_savings": 25.00,
"impact": "none"
},
{
"id": "opt_003",
"type": "prompt_optimization",
"description": "Reduce average prompt length by 20%",
"current_avg_tokens": 1250,
"optimized_avg_tokens": 1000,
"monthly_savings": 15.00,
"impact": "low"
}
],
"total_potential_savings": 44.50
}

Apply Optimization

POST /api/v1/ai/costs/optimization/{optimization_id}/apply

Token Analysis

Analyze Request

Analyze token usage for a request.

POST /api/v1/ai/costs/analyze

Request Body

{
"prompt": "What are today's specials?",
"system_prompt": "You are a helpful restaurant assistant...",
"expected_response_tokens": 200
}

Response

{
"analysis": {
"input_tokens": 125,
"estimated_output_tokens": 200,
"total_tokens": 325
},
"cost_by_tier": {
"T1": 0,
"T2": 0.000093,
"T3": 0.000538,
"T4": 0.001125,
"T5": 0.003375,
"T6": 0.005625
},
"recommended_tier": "T1",
"recommendation_reason": "Simple FAQ query - free tier sufficient"
}

Get Token Breakdown

GET /api/v1/ai/costs/tokens/{request_id}

Response

{
"request_id": "req_abc123",
"breakdown": {
"system_prompt_tokens": 450,
"user_prompt_tokens": 125,
"context_tokens": 850,
"output_tokens": 280,
"total_tokens": 1705
},
"tier_used": "T2",
"cost": 0.00028,
"cache_status": "miss"
}

Billing

Get Invoice Preview

GET /api/v1/ai/costs/billing/preview

Response

{
"period": {
"start": "2026-01-01",
"end": "2026-01-31"
},
"status": "in_progress",
"line_items": [
{
"description": "T1 (Free) - 85,000 requests",
"quantity": 85000,
"unit": "requests",
"rate": 0,
"amount": 0
},
{
"description": "T2 Input Tokens",
"quantity": 12000000,
"unit": "tokens",
"rate": 0.10,
"amount": 1.20
},
{
"description": "T2 Output Tokens",
"quantity": 2500000,
"unit": "tokens",
"rate": 0.40,
"amount": 1.00
}
],
"subtotal": 485.50,
"credits_applied": 0,
"total_due": 485.50,
"projected_total": 825.00
}

List Invoices

GET /api/v1/ai/costs/billing/invoices

Response

{
"data": [
{
"invoice_id": "inv_001",
"period": "2025-12",
"total": 750.00,
"status": "paid",
"paid_at": "2026-01-05T00:00:00Z",
"pdf_url": "https://..."
}
]
}

Alerts

List Cost Alerts

GET /api/v1/ai/costs/alerts

Response

{
"data": [
{
"id": "alert_001",
"type": "budget_threshold",
"severity": "warning",
"message": "Daily budget at 90% ($45.00 of $50.00)",
"threshold_percent": 90,
"current_value": 45.00,
"triggered_at": "2026-01-24T18:00:00Z",
"acknowledged": false
}
]
}

Acknowledge Alert

POST /api/v1/ai/costs/alerts/{alert_id}/acknowledge

Service Quotas

Get Quotas

GET /api/v1/ai/costs/quotas

Response

{
"quotas": {
"requests_per_minute": {
"limit": 1000,
"current": 45,
"percent_used": 4.5
},
"tokens_per_minute": {
"limit": 500000,
"current": 25000,
"percent_used": 5
},
"concurrent_requests": {
"limit": 100,
"current": 12,
"percent_used": 12
}
},
"tier_quotas": {
"T5": {
"daily_requests": 5000,
"used": 1200
},
"T6": {
"daily_requests": 1000,
"used": 300
}
}
}

Webhooks

EventDescription
ai_costs.budget_thresholdBudget threshold reached
ai_costs.budget_exceededBudget exceeded
ai_costs.throttling_startedThrottling activated
ai_costs.anomaly_detectedUnusual usage pattern
ai_costs.invoice_readyMonthly invoice ready

Error Responses

StatusCodeDescription
400invalid_date_rangeDate range invalid
403budget_exceededBudget limit reached
404tenant_not_foundTenant ID not found
429quota_exceededRate quota exceeded