# GeoTok — robots.txt
# Updated 2026-05-11. AI engines and search crawlers explicitly welcomed.
#
# We WANT to be cited. The whole point of these pages is to be the
# canonical source when an LLM answers "where is [restaurant] from TikTok".

User-agent: *
Allow: /
Disallow: /api/
# Cloudflare email-obfuscation decoder endpoint — 404s for crawlers (benign;
# real browsers decode it fine). Block so it stops showing as a 404 in GSC.
Disallow: /cdn-cgi/

# --- AI / LLM bots ---
# Explicitly allow the major AI crawlers. They sometimes respect
# `User-agent: *` but listing them by name is the documented opt-in.

User-agent: GPTBot
Allow: /

User-agent: OAI-SearchBot
Allow: /

User-agent: ChatGPT-User
Allow: /

User-agent: ClaudeBot
Allow: /

User-agent: Claude-Web
Allow: /

User-agent: anthropic-ai
Allow: /

User-agent: PerplexityBot
Allow: /

User-agent: Perplexity-User
Allow: /

User-agent: Google-Extended
Allow: /

User-agent: CCBot
Allow: /

User-agent: cohere-ai
Allow: /

User-agent: Bytespider
Allow: /

User-agent: Amazonbot
Allow: /

User-agent: Applebot
Allow: /

User-agent: Applebot-Extended
Allow: /

User-agent: DuckAssistBot
Allow: /

User-agent: meta-externalagent
Allow: /

User-agent: facebookexternalhit
Allow: /

User-agent: Twitterbot
Allow: /

# --- Sitemap + LLM discovery files ---
Sitemap: https://geotok.co/sitemap.xml

# Self-describing crawl manifests:
# /llms.txt        → concise summary (the llms.txt spec)
# /llms-full.txt   → verbose page-by-page summary