{
  "source": "https://llmcalc.app",
  "docs": "https://llmcalc.app/about#data",
  "license": "Free to use with attribution. Link to https://llmcalc.app where practical.",
  "schema_version": 3,
  "last_updated": "2026-06-12",
  "notes": "All prices USD per 1M tokens unless stated. confidence \"official\" means verified against the provider's published pricing on last_verified. confidence \"delisted\" means the provider removed the model from its public price list; the last verified rates are retained for reference.",
  "models": [
    {
      "slug": "claude-fable-5",
      "name": "Claude Fable 5",
      "provider": "anthropic",
      "confidence": "official",
      "last_verified": "2026-06-12",
      "context_window": 1000000,
      "max_output": 128000,
      "release_date": "2026-06-09",
      "pricing": {
        "standard": {
          "input_per_1m": 10,
          "output_per_1m": 50
        },
        "cache": {
          "type": "explicit",
          "read_per_1m": 1,
          "write_5m_per_1m": 12.5,
          "write_1h_per_1m": 20,
          "minimum_cache_tokens": 1024,
          "ttl_options": [
            "5m",
            "1h"
          ]
        },
        "batch": {
          "input_discount_percent": 50,
          "output_discount_percent": 50,
          "stacks_with_cache": "yes"
        },
        "long_context": {
          "has_separate_tier": false,
          "note": "1M context window included at standard pricing on the Claude API."
        },
        "extras": {
          "reasoning_tokens_billed_as": "output",
          "web_search_per_1000": 10,
          "note": "Adaptive thinking is always on. Claude Mythos 5 shares the same pricing in limited availability (Project Glasswing). Uses the tokenizer introduced with Opus 4.7: the same text produces roughly 30% more tokens than older Claude models, so per-token estimates based on older tokenizers undercount."
        }
      },
      "official_docs_url": "https://platform.claude.com/docs/en/docs/about-claude/pricing"
    },
    {
      "slug": "claude-opus-4-8",
      "name": "Claude Opus 4.8",
      "provider": "anthropic",
      "confidence": "official",
      "last_verified": "2026-06-12",
      "context_window": 1000000,
      "max_output": 128000,
      "release_date": "2026-05-28",
      "pricing": {
        "standard": {
          "input_per_1m": 5,
          "output_per_1m": 25
        },
        "cache": {
          "type": "explicit",
          "read_per_1m": 0.5,
          "write_5m_per_1m": 6.25,
          "write_1h_per_1m": 10,
          "minimum_cache_tokens": 1024,
          "ttl_options": [
            "5m",
            "1h"
          ]
        },
        "batch": {
          "input_discount_percent": 50,
          "output_discount_percent": 50,
          "stacks_with_cache": "yes"
        },
        "long_context": {
          "has_separate_tier": false,
          "note": "1M context window included at standard pricing on the Claude API."
        },
        "extras": {
          "reasoning_tokens_billed_as": "output",
          "web_search_per_1000": 10,
          "note": "Fast mode (research preview) runs this model at $10/$50 per 1M with faster output. Uses the tokenizer introduced with Opus 4.7: the same text produces roughly 30% more tokens than older Claude models, so per-token estimates based on older tokenizers undercount."
        }
      },
      "official_docs_url": "https://platform.claude.com/docs/en/docs/about-claude/pricing"
    },
    {
      "slug": "claude-opus-4-7",
      "name": "Claude Opus 4.7",
      "provider": "anthropic",
      "confidence": "official",
      "last_verified": "2026-06-12",
      "context_window": 1000000,
      "max_output": 128000,
      "release_date": "2026-03-01",
      "pricing": {
        "standard": {
          "input_per_1m": 5,
          "output_per_1m": 25
        },
        "cache": {
          "type": "explicit",
          "read_per_1m": 0.5,
          "write_5m_per_1m": 6.25,
          "write_1h_per_1m": 10,
          "minimum_cache_tokens": 1024,
          "ttl_options": [
            "5m",
            "1h"
          ]
        },
        "batch": {
          "input_discount_percent": 50,
          "output_discount_percent": 50,
          "stacks_with_cache": "yes"
        },
        "long_context": {
          "has_separate_tier": false,
          "note": "1M context window included at standard pricing on the Claude API."
        },
        "extras": {
          "reasoning_tokens_billed_as": "output",
          "web_search_per_1000": 10,
          "note": "Uses the tokenizer introduced with Opus 4.7: the same text produces roughly 30% more tokens than older Claude models, so per-token estimates based on older tokenizers undercount."
        }
      },
      "official_docs_url": "https://platform.claude.com/docs/en/docs/about-claude/pricing"
    },
    {
      "slug": "claude-sonnet-4-6",
      "name": "Claude Sonnet 4.6",
      "provider": "anthropic",
      "confidence": "official",
      "last_verified": "2026-06-12",
      "context_window": 1000000,
      "max_output": 64000,
      "release_date": "2026-01-15",
      "pricing": {
        "standard": {
          "input_per_1m": 3,
          "output_per_1m": 15
        },
        "cache": {
          "type": "explicit",
          "read_per_1m": 0.3,
          "write_5m_per_1m": 3.75,
          "write_1h_per_1m": 6,
          "minimum_cache_tokens": 1024,
          "ttl_options": [
            "5m",
            "1h"
          ]
        },
        "batch": {
          "input_discount_percent": 50,
          "output_discount_percent": 50,
          "stacks_with_cache": "yes"
        },
        "long_context": {
          "has_separate_tier": false,
          "note": "1M context window included at standard pricing on the Claude API."
        },
        "extras": {
          "reasoning_tokens_billed_as": "output",
          "web_search_per_1000": 10
        }
      },
      "official_docs_url": "https://platform.claude.com/docs/en/docs/about-claude/pricing"
    },
    {
      "slug": "claude-haiku-4-5",
      "name": "Claude Haiku 4.5",
      "provider": "anthropic",
      "confidence": "official",
      "last_verified": "2026-06-12",
      "context_window": 200000,
      "max_output": 64000,
      "release_date": "2025-10-01",
      "pricing": {
        "standard": {
          "input_per_1m": 1,
          "output_per_1m": 5
        },
        "cache": {
          "type": "explicit",
          "read_per_1m": 0.1,
          "write_5m_per_1m": 1.25,
          "write_1h_per_1m": 2,
          "minimum_cache_tokens": 1024,
          "ttl_options": [
            "5m",
            "1h"
          ]
        },
        "batch": {
          "input_discount_percent": 50,
          "output_discount_percent": 50,
          "stacks_with_cache": "yes"
        },
        "long_context": {
          "has_separate_tier": false
        },
        "extras": {
          "reasoning_tokens_billed_as": "output",
          "web_search_per_1000": 10
        }
      },
      "official_docs_url": "https://platform.claude.com/docs/en/docs/about-claude/pricing"
    },
    {
      "slug": "gpt-5-5",
      "name": "GPT-5.5",
      "provider": "openai",
      "confidence": "official",
      "last_verified": "2026-06-12",
      "context_window": 1000000,
      "max_output": 128000,
      "release_date": "2026-04-23",
      "pricing": {
        "standard": {
          "input_per_1m": 5,
          "output_per_1m": 30
        },
        "cache": {
          "type": "automatic",
          "read_per_1m": 0.5
        },
        "batch": {
          "input_discount_percent": 50,
          "output_discount_percent": 50,
          "stacks_with_cache": "yes"
        },
        "long_context": {
          "has_separate_tier": false
        },
        "extras": {
          "reasoning_tokens_billed_as": "output",
          "web_search_per_1000": null
        }
      },
      "official_docs_url": "https://developers.openai.com/api/docs/pricing"
    },
    {
      "slug": "gpt-5-4",
      "name": "GPT-5.4",
      "provider": "openai",
      "confidence": "official",
      "last_verified": "2026-06-12",
      "context_window": 1000000,
      "max_output": 128000,
      "release_date": "2026-03-05",
      "pricing": {
        "standard": {
          "input_per_1m": 2.5,
          "output_per_1m": 15
        },
        "cache": {
          "type": "automatic",
          "read_per_1m": 0.25
        },
        "batch": {
          "input_discount_percent": 50,
          "output_discount_percent": 50,
          "stacks_with_cache": "yes"
        },
        "long_context": {
          "has_separate_tier": false
        },
        "extras": {
          "reasoning_tokens_billed_as": "output",
          "web_search_per_1000": null
        }
      },
      "official_docs_url": "https://developers.openai.com/api/docs/pricing"
    },
    {
      "slug": "gpt-5-4-mini",
      "name": "GPT-5.4 mini",
      "provider": "openai",
      "confidence": "official",
      "last_verified": "2026-06-12",
      "context_window": 400000,
      "max_output": 128000,
      "release_date": null,
      "pricing": {
        "standard": {
          "input_per_1m": 0.75,
          "output_per_1m": 4.5
        },
        "cache": {
          "type": "automatic",
          "read_per_1m": 0.075
        },
        "batch": {
          "input_discount_percent": 50,
          "output_discount_percent": 50,
          "stacks_with_cache": "yes"
        },
        "long_context": {
          "has_separate_tier": false
        },
        "extras": {
          "reasoning_tokens_billed_as": "output",
          "web_search_per_1000": null
        }
      },
      "official_docs_url": "https://developers.openai.com/api/docs/pricing"
    },
    {
      "slug": "gpt-5-4-nano",
      "name": "GPT-5.4 nano",
      "provider": "openai",
      "confidence": "official",
      "last_verified": "2026-06-12",
      "context_window": 400000,
      "max_output": 128000,
      "release_date": null,
      "pricing": {
        "standard": {
          "input_per_1m": 0.2,
          "output_per_1m": 1.25
        },
        "cache": {
          "type": "automatic",
          "read_per_1m": 0.02
        },
        "batch": {
          "input_discount_percent": 50,
          "output_discount_percent": 50,
          "stacks_with_cache": "yes"
        },
        "long_context": {
          "has_separate_tier": false
        },
        "extras": {
          "reasoning_tokens_billed_as": "output",
          "web_search_per_1000": null
        }
      },
      "official_docs_url": "https://developers.openai.com/api/docs/pricing"
    },
    {
      "slug": "gpt-4o",
      "name": "GPT-4o",
      "provider": "openai",
      "confidence": "delisted",
      "last_verified": "2026-05-06",
      "context_window": 128000,
      "max_output": 16384,
      "release_date": "2024-05-13",
      "pricing": {
        "standard": {
          "input_per_1m": 2.5,
          "output_per_1m": 10
        },
        "cache": {
          "type": "automatic",
          "read_per_1m": 1.25
        },
        "batch": {
          "input_discount_percent": 50,
          "output_discount_percent": 50,
          "stacks_with_cache": "yes"
        },
        "long_context": {
          "has_separate_tier": false
        },
        "extras": {
          "reasoning_tokens_billed_as": null,
          "web_search_per_1000": 25,
          "note": "Removed from OpenAI's public pricing page (observed 2026-06-12). Prices shown are the last rates we verified on 2026-05-06 and are kept for reference only."
        }
      },
      "official_docs_url": "https://platform.openai.com/docs/pricing"
    },
    {
      "slug": "gpt-4o-mini",
      "name": "GPT-4o mini",
      "provider": "openai",
      "confidence": "delisted",
      "last_verified": "2026-05-06",
      "context_window": 128000,
      "max_output": 16384,
      "release_date": "2024-07-18",
      "pricing": {
        "standard": {
          "input_per_1m": 0.15,
          "output_per_1m": 0.6
        },
        "cache": {
          "type": "automatic",
          "read_per_1m": 0.075
        },
        "batch": {
          "input_discount_percent": 50,
          "output_discount_percent": 50,
          "stacks_with_cache": "yes"
        },
        "long_context": {
          "has_separate_tier": false
        },
        "extras": {
          "reasoning_tokens_billed_as": null,
          "web_search_per_1000": null,
          "note": "Removed from OpenAI's public pricing page (observed 2026-06-12). Prices shown are the last rates we verified on 2026-05-06 and are kept for reference only."
        }
      },
      "official_docs_url": "https://platform.openai.com/docs/pricing"
    },
    {
      "slug": "o3",
      "name": "OpenAI o3",
      "provider": "openai",
      "confidence": "delisted",
      "last_verified": "2026-05-06",
      "context_window": 200000,
      "max_output": 100000,
      "release_date": "2025-04-16",
      "pricing": {
        "standard": {
          "input_per_1m": 2,
          "output_per_1m": 8
        },
        "cache": {
          "type": "automatic",
          "read_per_1m": 0.5
        },
        "batch": {
          "input_discount_percent": 50,
          "output_discount_percent": 50,
          "stacks_with_cache": "yes"
        },
        "long_context": {
          "has_separate_tier": false
        },
        "extras": {
          "reasoning_tokens_billed_as": "output",
          "web_search_per_1000": null,
          "note": "Removed from OpenAI's public pricing page (observed 2026-06-12). Prices shown are the last rates we verified on 2026-05-06 and are kept for reference only. Reflects OpenAI's mid-2025 o3 reprice. Earlier launch pricing was $15/$60 input/output."
        }
      },
      "official_docs_url": "https://platform.openai.com/docs/pricing"
    },
    {
      "slug": "o3-mini",
      "name": "OpenAI o3-mini",
      "provider": "openai",
      "confidence": "delisted",
      "last_verified": "2026-05-06",
      "context_window": 200000,
      "max_output": 100000,
      "release_date": "2025-01-31",
      "pricing": {
        "standard": {
          "input_per_1m": 1.1,
          "output_per_1m": 4.4
        },
        "cache": {
          "type": "automatic",
          "read_per_1m": 0.55
        },
        "batch": {
          "input_discount_percent": 50,
          "output_discount_percent": 50,
          "stacks_with_cache": "yes"
        },
        "long_context": {
          "has_separate_tier": false
        },
        "extras": {
          "reasoning_tokens_billed_as": "output",
          "web_search_per_1000": null,
          "note": "Removed from OpenAI's public pricing page (observed 2026-06-12). Prices shown are the last rates we verified on 2026-05-06 and are kept for reference only."
        }
      },
      "official_docs_url": "https://platform.openai.com/docs/pricing"
    },
    {
      "slug": "gemini-3-1-pro",
      "name": "Gemini 3.1 Pro (Preview)",
      "provider": "google",
      "confidence": "official",
      "last_verified": "2026-06-12",
      "context_window": 1048576,
      "max_output": 65536,
      "release_date": "2026-02-19",
      "pricing": {
        "standard": {
          "input_per_1m": 2,
          "output_per_1m": 12
        },
        "cache": {
          "type": "explicit",
          "read_per_1m": 0.2,
          "storage_per_1m_per_hour": 4.5
        },
        "batch": {
          "input_discount_percent": 50,
          "output_discount_percent": 50,
          "stacks_with_cache": "yes"
        },
        "long_context": {
          "has_separate_tier": true,
          "threshold_tokens": 200000,
          "input_per_1m": 4,
          "output_per_1m": 18,
          "cache_read_per_1m": 0.4
        },
        "extras": {
          "reasoning_tokens_billed_as": "output",
          "web_search_per_1000": null,
          "note": "Preview model. Prices are the standard tier; batch/flex run at 50% of standard."
        }
      },
      "official_docs_url": "https://ai.google.dev/gemini-api/docs/pricing"
    },
    {
      "slug": "gemini-3-5-flash",
      "name": "Gemini 3.5 Flash",
      "provider": "google",
      "confidence": "official",
      "last_verified": "2026-06-12",
      "context_window": 1000000,
      "max_output": 65536,
      "release_date": "2026-05-19",
      "pricing": {
        "standard": {
          "input_per_1m": 1.5,
          "output_per_1m": 9
        },
        "cache": {
          "type": "explicit",
          "read_per_1m": 0.15,
          "storage_per_1m_per_hour": 1
        },
        "batch": {
          "input_discount_percent": 50,
          "output_discount_percent": 50,
          "stacks_with_cache": "yes"
        },
        "long_context": {
          "has_separate_tier": false
        },
        "extras": {
          "reasoning_tokens_billed_as": "output",
          "web_search_per_1000": null,
          "note": null
        }
      },
      "official_docs_url": "https://ai.google.dev/gemini-api/docs/pricing"
    },
    {
      "slug": "gemini-3-1-flash-lite",
      "name": "Gemini 3.1 Flash-Lite",
      "provider": "google",
      "confidence": "official",
      "last_verified": "2026-06-12",
      "context_window": 1000000,
      "max_output": 65536,
      "release_date": null,
      "pricing": {
        "standard": {
          "input_per_1m": 0.25,
          "output_per_1m": 1.5
        },
        "cache": {
          "type": "explicit",
          "read_per_1m": 0.025,
          "storage_per_1m_per_hour": 1
        },
        "batch": {
          "input_discount_percent": 50,
          "output_discount_percent": 50,
          "stacks_with_cache": "yes"
        },
        "long_context": {
          "has_separate_tier": false
        },
        "extras": {
          "reasoning_tokens_billed_as": "output",
          "web_search_per_1000": null,
          "note": "Text/image/video pricing shown. Audio input is $0.50/1M (cache read $0.05)."
        }
      },
      "official_docs_url": "https://ai.google.dev/gemini-api/docs/pricing"
    },
    {
      "slug": "gemini-2-5-pro",
      "name": "Gemini 2.5 Pro",
      "provider": "google",
      "confidence": "official",
      "last_verified": "2026-06-12",
      "context_window": 2000000,
      "max_output": 65536,
      "release_date": "2025-03-25",
      "pricing": {
        "standard": {
          "input_per_1m": 1.25,
          "output_per_1m": 10
        },
        "cache": {
          "type": "explicit",
          "read_per_1m": 0.125,
          "storage_per_1m_per_hour": 4.5
        },
        "batch": {
          "input_discount_percent": 50,
          "output_discount_percent": 50,
          "stacks_with_cache": "yes"
        },
        "long_context": {
          "has_separate_tier": true,
          "threshold_tokens": 200000,
          "input_per_1m": 2.5,
          "output_per_1m": 15,
          "cache_read_per_1m": 0.25
        },
        "extras": {
          "reasoning_tokens_billed_as": "output",
          "web_search_per_1000": 35
        }
      },
      "official_docs_url": "https://ai.google.dev/gemini-api/docs/pricing"
    },
    {
      "slug": "gemini-2-5-flash",
      "name": "Gemini 2.5 Flash",
      "provider": "google",
      "confidence": "official",
      "last_verified": "2026-06-12",
      "context_window": 1000000,
      "max_output": 65536,
      "release_date": "2025-03-25",
      "pricing": {
        "standard": {
          "input_per_1m": 0.3,
          "output_per_1m": 2.5
        },
        "cache": {
          "type": "explicit",
          "read_per_1m": 0.03,
          "storage_per_1m_per_hour": 1
        },
        "batch": {
          "input_discount_percent": 50,
          "output_discount_percent": 50,
          "stacks_with_cache": "yes"
        },
        "long_context": {
          "has_separate_tier": false
        },
        "extras": {
          "reasoning_tokens_billed_as": "output",
          "web_search_per_1000": null,
          "note": "Text/image/video pricing shown. Audio input is $1.00/1M (cache read $0.10), audio batch $0.50/1M."
        }
      },
      "official_docs_url": "https://ai.google.dev/gemini-api/docs/pricing"
    },
    {
      "slug": "llama-4-scout",
      "name": "Llama 4 Scout (Groq)",
      "provider": "groq",
      "confidence": "official",
      "last_verified": "2026-06-12",
      "context_window": 128000,
      "max_output": 8192,
      "release_date": "2025-04-05",
      "pricing": {
        "standard": {
          "input_per_1m": 0.11,
          "output_per_1m": 0.34
        },
        "cache": null,
        "batch": null,
        "long_context": {
          "has_separate_tier": false
        },
        "extras": {
          "reasoning_tokens_billed_as": null,
          "web_search_per_1000": null,
          "note": "Hosted by Groq, which caps context at 128k (Meta's weights support more). Mixture-of-experts: 17B active parameters, 16 experts. Other hosts price the same model differently."
        }
      },
      "official_docs_url": "https://console.groq.com/docs/model/meta-llama/llama-4-scout-17b-16e-instruct"
    },
    {
      "slug": "llama-3-3-70b",
      "name": "Llama 3.3 70B Versatile (Groq)",
      "provider": "groq",
      "confidence": "official",
      "last_verified": "2026-06-12",
      "context_window": 128000,
      "max_output": 4096,
      "release_date": "2024-12-06",
      "pricing": {
        "standard": {
          "input_per_1m": 0.59,
          "output_per_1m": 0.79
        },
        "cache": null,
        "batch": null,
        "long_context": {
          "has_separate_tier": false
        },
        "extras": {
          "reasoning_tokens_billed_as": null,
          "web_search_per_1000": null,
          "note": "Hosted by Groq. Meta provides the model weights (Llama 3.3 70B), Groq sets the hosted API price. Other hosts (Together, Fireworks, etc.) price the same model differently; check your specific host for actuals."
        }
      },
      "official_docs_url": "https://console.groq.com/docs/model/llama-3.3-70b-versatile"
    }
  ],
  "claude_code_plans": {
    "last_verified": "2026-06-12",
    "source": "https://claude.com/pricing",
    "plans": [
      {
        "slug": "pro",
        "name": "Claude Pro",
        "monthly_usd": 20,
        "best_for": "Casual builders, weekend projects, light daily Claude Code use",
        "watch_out": "May hit usage limits during long agentic sessions"
      },
      {
        "slug": "max-5x",
        "name": "Claude Max 5x",
        "monthly_usd": 100,
        "best_for": "Daily developers who use Claude Code heavily",
        "watch_out": "Fixed cost only makes sense if you use it enough"
      },
      {
        "slug": "max-20x",
        "name": "Claude Max 20x",
        "monthly_usd": 200,
        "best_for": "Very heavy users, long sessions, frequent limit frustration",
        "watch_out": "Expensive if usage is inconsistent"
      },
      {
        "slug": "api",
        "name": "Anthropic API",
        "monthly_usd": null,
        "best_for": "Automation, CI, background agents, predictable usage-based workflows",
        "watch_out": "Can get expensive if loops run uncontrolled"
      }
    ]
  }
}