Overview
Proper error handling is crucial for building reliable AI agents. This guide covers all error scenarios you’ll encounter with AgentWarden and how to handle them gracefully.Exception Types
The AgentWarden SDK raises specific exceptions for different error conditions:Copy
from agentwarden import (
AgentWarden,
AgentWardenError, # Base exception
AuthenticationError, # Invalid API key
RateLimitError, # Too many requests
NetworkError, # Connection issues
ValidationError # Invalid parameters
)
Authentication Errors
Invalid API Key
Cause: API key is incorrect, expired, or organization is inactive.Copy
from agentwarden import AgentWarden, AuthenticationError
try:
guard = AgentWarden(api_key="invalid_key")
result = guard.check("agent-id", "action")
except AuthenticationError as e:
print(f"Authentication failed: {e}")
# Action: Check API key in dashboard, verify it's correct
# Notify admin if this happens in production
Copy
{
"detail": "Invalid API key"
}
- Verify API key is correct (copy from dashboard)
- Check environment variables are loaded
- Ensure organization is active (not suspended for payment)
- Regenerate API key if compromised
Organization Inactive
Copy
try:
result = guard.check("agent-id", "action")
except AuthenticationError as e:
if "not active" in str(e).lower():
# Organization suspended or deleted
notify_admin("Organization suspended - check billing")
- Free trial expired
- Payment failed
- Organization deleted
- Account suspended
Rate Limit Errors
Exceeding Request Limits
Cause: Too many API requests in a short time.Copy
from agentwarden import RateLimitError
import time
def check_with_retry(agent_id, action, context, max_retries=3):
"""
Check permission with exponential backoff retry
"""
for attempt in range(max_retries):
try:
return guard.check(agent_id, action, context)
except RateLimitError as e:
if attempt < max_retries - 1:
# Exponential backoff: 1s, 2s, 4s
wait_time = (2 ** attempt)
print(f"Rate limited. Retrying in {wait_time}s...")
time.sleep(wait_time)
else:
# Final attempt failed
print("Rate limit exceeded after retries")
raise
return None
# Usage
try:
result = check_with_retry("agent-id", "action", {"amount": 50})
except RateLimitError:
# Still rate limited after retries
# Queue the action for later or notify user
queue_for_later(action)
Copy
X-RateLimit-Limit: 1000
X-RateLimit-Remaining: 0
X-RateLimit-Reset: 1640000060
Retry-After: 60
- Implement exponential backoff
- Cache permission checks when appropriate
- Batch operations instead of individual calls
- Monitor rate limit headers
- Upgrade plan if consistently hitting limits
Reading Rate Limit Headers
Copy
def check_with_rate_limit_monitoring(agent_id, action, context):
"""
Check permission and monitor rate limits
"""
import requests
response = requests.post(
'https://api.agentwarden.io/sdk/check',
headers={'X-API-Key': api_key},
json={'agent_id': agent_id, 'action': action, 'context': context}
)
# Check rate limit headers
remaining = int(response.headers.get('X-RateLimit-Remaining', 0))
limit = int(response.headers.get('X-RateLimit-Limit', 0))
if remaining < limit * 0.1: # Less than 10% remaining
print(f"⚠️ Warning: Only {remaining}/{limit} requests remaining")
# Alert monitoring system
send_alert("Low rate limit remaining")
return response.json()
Network Errors
Connection Failures
Cause: Network issues, API downtime, DNS problems.Copy
from agentwarden import NetworkError
import logging
logger = logging.getLogger(__name__)
def check_with_network_retry(agent_id, action, context, max_retries=3):
"""
Check permission with network error handling
"""
for attempt in range(max_retries):
try:
return guard.check(agent_id, action, context)
except NetworkError as e:
logger.error(f"Network error on attempt {attempt + 1}: {e}")
if attempt < max_retries - 1:
time.sleep(2 ** attempt) # Exponential backoff
else:
# Network is persistently down
# Fail safely - deny action by default for safety
logger.critical("AgentWarden API unreachable after retries")
# Option 1: Deny all actions (safest)
return CheckResponse(
allowed=False,
requires_approval=False,
reason="Permission system unavailable - denied for safety"
)
# Option 2: Allow low-risk actions, deny high-risk
# (only if you have local fallback logic)
- Deny by Default (Recommended)
- Local Fallback
- Allow Low-Risk Only
Copy
# If AgentWarden is unreachable, deny all actions
try:
result = guard.check(agent_id, action, context)
except NetworkError:
# Deny for safety
return {"allowed": False, "reason": "System unavailable"}
Copy
# Use local permission cache (with short TTL)
try:
result = guard.check(agent_id, action, context)
except NetworkError:
# Check local cache (if recently fetched)
cached = get_cached_permission(agent_id, action)
if cached and not_expired(cached):
return cached
else:
# No valid cache - deny
return {"allowed": False}
Copy
# Allow only pre-approved low-risk actions
LOW_RISK_ACTIONS = ["email.send", "notification.create"]
try:
result = guard.check(agent_id, action, context)
except NetworkError:
if action in LOW_RISK_ACTIONS:
return {"allowed": True} # Allow low-risk
else:
return {"allowed": False} # Deny high-risk
Validation Errors
Missing Required Fields
Copy
from agentwarden import ValidationError
try:
# Missing required 'action' field
result = guard.check(
agent_id="bot-123",
action=None, # Invalid!
context={}
)
except ValidationError as e:
print(f"Invalid parameters: {e}")
# Fix: Ensure all required fields are provided
Invalid Status Values
Copy
try:
# Invalid status value
guard.log(
agent_id="bot-123",
action="test",
status="complete", # Should be: success, failed, pending, denied
context={}
)
except ValidationError as e:
print(f"Invalid status: {e}")
# Fix: Use valid status values
"success"- Action completed successfully"failed"- Action failed"pending"- Waiting (e.g., for approval)"denied"- Permission denied
Permission Denied Scenarios
No Permission Exists
Copy
result = guard.check("agent-id", "unknown.action", {})
if not result.allowed and not result.requires_approval:
print(f"Permission denied: {result.reason}")
# Reason: "No permission found for action 'unknown.action'"
# Actions:
# 1. Check action name spelling
# 2. Create permission in dashboard
# 3. Notify user permission is missing
Amount Exceeds Limit
Copy
result = guard.check(
agent_id="support-bot",
action="stripe.refund",
context={"amount": 500.00} # Exceeds max of $100
)
if not result.allowed:
# Reason: "Amount 500.00 exceeds maximum allowed 100.00"
# Handle gracefully
if "exceeds maximum" in result.reason:
# Automatically escalate to human
notify_manager(amount=500, reason=result.reason)
return "Refund request forwarded to manager"
Agent Inactive
Copy
result = guard.check("inactive-agent-id", "action", {})
if not result.allowed and "not active" in result.reason:
# Reason: "Agent is not active (status: inactive)"
# Actions:
# 1. Check if agent was intentionally deactivated
# 2. Reactivate in dashboard if needed
# 3. Use different agent if this one is deprecated
Plan Limit Errors
Agent Limit Exceeded
Copy
try:
# Try to create agent
response = requests.post(
'https://api.agentwarden.io/api/agents',
headers={'Authorization': f'Bearer {jwt_token}'},
json={'name': 'new-agent', 'description': 'Test'}
)
if response.status_code == 403:
error = response.json()['detail']
if error.get('error') == 'plan_limit_exceeded':
print(f"❌ {error['message']}")
print(f"Current plan: {error['current_plan']}")
print(f"Upgrade at: {error['upgrade_url']}")
# Notify user to upgrade
notify_user_to_upgrade(error['message'])
except Exception as e:
print(f"Error creating agent: {e}")
Log Limit Exceeded
Copy
try:
guard.log(agent_id, action, "success", context)
except Exception as e:
if "log limit" in str(e).lower():
# Monthly log limit reached
print("⚠️ Monthly log limit reached - upgrade plan")
# Options:
# 1. Queue logs locally and batch upload later
# 2. Only log critical actions
# 3. Notify admin to upgrade
Comprehensive Error Handler
Here’s a production-ready error handler that covers all scenarios:Copy
from agentwarden import (
AgentWarden, AgentWardenError, AuthenticationError,
RateLimitError, NetworkError, ValidationError
)
import logging
import time
from typing import Optional
logger = logging.getLogger(__name__)
class SafeAgentGuard:
"""
Production-ready AgentWarden wrapper with comprehensive error handling
"""
def __init__(self, api_key: str, fail_safe: str = "deny"):
"""
Args:
api_key: AgentWarden API key
fail_safe: What to do if AgentWarden is unreachable:
'deny' (default) - deny all actions
'allow_low_risk' - allow predefined low-risk actions
"""
self.guard = AgentWarden(api_key=api_key)
self.fail_safe = fail_safe
self.low_risk_actions = ["email.send", "notification.create"]
def check(self, agent_id: str, action: str, context: dict = None,
max_retries: int = 3) -> dict:
"""
Check permission with comprehensive error handling
Returns:
dict with keys: allowed, requires_approval, reason, approval_id
"""
# Validate inputs
if not agent_id or not action:
logger.error("Missing required parameters")
return {
"allowed": False,
"requires_approval": False,
"reason": "Invalid request - missing agent_id or action"
}
# Retry loop
for attempt in range(max_retries):
try:
result = self.guard.check(agent_id, action, context or {})
# Success
return {
"allowed": result.allowed,
"requires_approval": result.requires_approval,
"reason": result.reason,
"approval_id": getattr(result, 'approval_id', None)
}
except AuthenticationError as e:
# Don't retry auth errors
logger.critical(f"Authentication failed: {e}")
return {
"allowed": False,
"requires_approval": False,
"reason": "Authentication failed - check API key"
}
except RateLimitError as e:
if attempt < max_retries - 1:
wait = 2 ** attempt
logger.warning(f"Rate limited, retrying in {wait}s")
time.sleep(wait)
else:
logger.error("Rate limit exceeded after retries")
return {
"allowed": False,
"requires_approval": False,
"reason": "Rate limit exceeded - try again later"
}
except NetworkError as e:
if attempt < max_retries - 1:
wait = 2 ** attempt
logger.warning(f"Network error, retrying in {wait}s: {e}")
time.sleep(wait)
else:
logger.critical(f"Network error after retries: {e}")
# Apply fail-safe strategy
return self._fail_safe_decision(action)
except ValidationError as e:
# Don't retry validation errors
logger.error(f"Validation error: {e}")
return {
"allowed": False,
"requires_approval": False,
"reason": f"Invalid request: {e}"
}
except Exception as e:
logger.error(f"Unexpected error: {e}")
return self._fail_safe_decision(action)
# Shouldn't reach here
return self._fail_safe_decision(action)
def _fail_safe_decision(self, action: str) -> dict:
"""
Make a safe decision when AgentWarden is unavailable
"""
if self.fail_safe == "allow_low_risk":
allowed = action in self.low_risk_actions
reason = "Low-risk action allowed (system unavailable)" if allowed else \
"High-risk action denied (system unavailable)"
else:
# deny by default
allowed = False
reason = "Action denied for safety (system unavailable)"
return {
"allowed": allowed,
"requires_approval": False,
"reason": reason
}
def log(self, agent_id: str, action: str, status: str,
context: dict = None) -> bool:
"""
Log action with error handling
Returns:
bool: True if logged successfully, False otherwise
"""
try:
self.guard.log(agent_id, action, status, context or {})
return True
except Exception as e:
logger.error(f"Failed to log action: {e}")
# Log failures are non-critical - don't block execution
return False
# Usage
safe_guard = SafeAgentGuard(
api_key=os.getenv('AGENTWARDEN_API_KEY'),
fail_safe='deny' # or 'allow_low_risk'
)
# All errors are handled gracefully
result = safe_guard.check("agent-id", "action", {"amount": 50})
if result['allowed']:
execute_action()
safe_guard.log("agent-id", "action", "success")
elif result['requires_approval']:
queue_for_approval(result['approval_id'])
else:
notify_user(result['reason'])
Monitoring and Alerting
Track Error Rates
Copy
import sentry_sdk
from prometheus_client import Counter
# Prometheus metrics
permission_check_errors = Counter(
'agentwarden_errors_total',
'Total errors from AgentWarden',
['error_type']
)
def check_with_monitoring(agent_id, action, context):
try:
return guard.check(agent_id, action, context)
except AuthenticationError:
permission_check_errors.labels(error_type='auth').inc()
sentry_sdk.capture_exception()
raise
except RateLimitError:
permission_check_errors.labels(error_type='rate_limit').inc()
raise
except NetworkError:
permission_check_errors.labels(error_type='network').inc()
sentry_sdk.capture_exception()
raise
Alert on Critical Errors
Copy
def send_alert(message: str, severity: str = "warning"):
"""Send alert to monitoring system"""
# Slack, PagerDuty, etc.
if severity == "critical":
# Page on-call
page_oncall(message)
else:
# Log to monitoring
logger.warning(message)
# Usage
try:
result = guard.check(agent_id, action, context)
except AuthenticationError:
send_alert("AgentWarden authentication failed", severity="critical")
except NetworkError:
send_alert("AgentWarden API unreachable", severity="critical")
Testing Error Scenarios
Copy
import pytest
from unittest.mock import Mock, patch
def test_authentication_error():
"""Test handling of authentication errors"""
with patch('agentwarden.AgentWarden') as MockGuard:
mock_guard = MockGuard.return_value
mock_guard.check.side_effect = AuthenticationError("Invalid API key")
result = safe_guard.check("agent-id", "action")
assert result['allowed'] == False
assert "Authentication failed" in result['reason']
def test_rate_limit_retry():
"""Test exponential backoff on rate limits"""
with patch('agentwarden.AgentWarden') as MockGuard:
mock_guard = MockGuard.return_value
# Fail twice, succeed third time
mock_guard.check.side_effect = [
RateLimitError("Too many requests"),
RateLimitError("Too many requests"),
Mock(allowed=True, requires_approval=False)
]
result = safe_guard.check("agent-id", "action")
assert result['allowed'] == True
assert mock_guard.check.call_count == 3
Quick Reference
| Error | Retry? | Fail-Safe Action |
|---|---|---|
AuthenticationError | ❌ No | Deny all |
RateLimitError | ✅ Yes (backoff) | Deny after retries |
NetworkError | ✅ Yes (backoff) | Configurable (deny/allow low-risk) |
ValidationError | ❌ No | Deny, fix code |
PermissionDenied | ❌ No | Handle gracefully |
PlanLimitExceeded | ❌ No | Notify to upgrade |