# robots.txt for cryptool.io
#
# Default policy: everything is crawlable.
# Explicit Allow blocks below document the AI crawlers we welcome
# (Anthropic, OpenAI, Perplexity, Google AI, Apple Intelligence, Meta,
# Common Crawl, etc.) so the intent is unambiguous to anyone reading
# the file. Removing a User-agent block does NOT block that crawler;
# add a `Disallow: /` line under the agent to opt out.

User-agent: *
Allow: /

# --- Anthropic (Claude, Claude Search) ---
User-agent: ClaudeBot
Allow: /
User-agent: Claude-Web
Allow: /
User-agent: anthropic-ai
Allow: /

# --- OpenAI (ChatGPT search, training, on-demand fetch) ---
User-agent: GPTBot
Allow: /
User-agent: OAI-SearchBot
Allow: /
User-agent: ChatGPT-User
Allow: /

# --- Perplexity ---
User-agent: PerplexityBot
Allow: /
User-agent: Perplexity-User
Allow: /

# --- Google (search + AI Overviews / Gemini training opt-in) ---
User-agent: Googlebot
Allow: /
User-agent: Google-Extended
Allow: /
User-agent: GoogleOther
Allow: /

# --- Microsoft / Bing (powers ChatGPT search and Copilot) ---
User-agent: bingbot
Allow: /
User-agent: msnbot
Allow: /

# --- Apple Intelligence ---
User-agent: Applebot
Allow: /
User-agent: Applebot-Extended
Allow: /

# --- Meta AI (Llama training + on-demand fetch) ---
User-agent: Meta-ExternalAgent
Allow: /
User-agent: Meta-ExternalFetcher
Allow: /
User-agent: FacebookBot
Allow: /

# --- DuckDuckGo (DuckAssist) ---
User-agent: DuckAssistBot
Allow: /
User-agent: DuckDuckBot
Allow: /

# --- Common Crawl (training corpus for many LLMs) ---
User-agent: CCBot
Allow: /

# --- Yandex, Baidu, ByteDance ---
User-agent: YandexBot
Allow: /
User-agent: Baiduspider
Allow: /
User-agent: Bytespider
Allow: /
User-agent: Amazonbot
Allow: /

Sitemap: https://cryptool.io/sitemap.xml

# AI ingestion hints (llmstxt.org convention)
# - llms.txt: compact site index (URLs only)
# - llms-full.txt: full prose content for one-shot ingestion
# Some AI crawlers prefer these over crawling the SPA route-by-route.