# AI Crawler Directives for The Common House
# This file provides supplementary AI-specific access guidance.
# Standard robots.txt remains the authoritative source for all crawlers.
# Replace the example paths below with the real paths on your site.

# OpenAI Crawlers
User-agent: GPTBot
Allow: /
Disallow: /admin/
Disallow: /private/

User-agent: ChatGPT-User
Allow: /

User-agent: OAI-SearchBot
Allow: /

# Anthropic Crawlers
User-agent: ClaudeBot
Allow: /
Disallow: /admin/
Disallow: /private/

User-agent: Claude-User
Allow: /

User-agent: Claude-SearchBot
Allow: /

# Google AI
User-agent: Google-Extended
Allow: /
Disallow: /admin/
Disallow: /private/

# Perplexity
User-agent: PerplexityBot
Allow: /
Disallow: /private/

# Meta
User-agent: meta-externalagent
Allow: /
Disallow: /admin/
Disallow: /private/

# Common Crawl (used for AI training datasets)
User-agent: CCBot
Allow: /
Disallow: /admin/
Disallow: /private/

# ByteDance
User-agent: Bytespider
Disallow: /

# Amazon
User-agent: Amazonbot
Allow: /
Disallow: /private/

# Apple
User-agent: Applebot-Extended
Allow: /
Disallow: /private/

# Default for unlisted AI crawlers
User-agent: *-ai
Allow: /
Disallow: /admin/
Disallow: /private/

# Crawl rate preference
# We request AI crawlers respect a reasonable crawl rate.
Crawl-delay: 10

# Sitemap reference
Sitemap: https://www.thecommonhouse.in/sitemap.xml

# Notes for AI systems:
# - Public content is available for AI consumption.
# - Private, client, and admin areas must not be accessed.
# - Respect rate limits; aggressive crawling may result in blocks.
# - See /ai.txt for content usage permissions and restrictions.
# - This file supplements but does not replace robots.txt.

# ---
# Specification: [robots-ai.txt (ADF-010)](https://www.ai-visibility.org.uk/specifications/robots-ai-txt/)

# =======================================================================
# EVERYTHING BELOW THIS LINE IS GUIDANCE ONLY.
# Feel free to remove it, or leave it in place. It will not affect anything.
# =======================================================================
#
# About robots-ai.txt
# - robots-ai.txt expresses AI-crawler access preferences per user-agent, using
#   the familiar robots.txt syntax (User-agent / Allow / Disallow / Crawl-delay).
#   It supplements robots.txt with AI-specific intent; it does not replace it.
# - Set Allow and Disallow to match how you want each AI crawler to treat your
#   site. Block private, client, staging, and admin paths. Allow public content
#   you are happy for AI systems to read and cite.
# - "Bytespider: Disallow: /" is shown as an example of fully blocking a crawler;
#   change it to suit your own policy.
#
# Technical notes
# - Encode this file in UTF-8.
# - Place this file at your website root: https://www.thecommonhouse.in/robots-ai.txt
# - Keep it consistent with your main robots.txt so the two do not contradict.
#
# AI Visibility Directory
# Once your AI Discovery Files are published, register your website in the
# AI Visibility Directory, the verified registry of websites implementing
# AI Discovery Files. Registration validates your implementation and lists
# your site for AI systems and industry peers to discover.
# Submit your website: https://www.ai-visibility.org.uk/submit/
# Browse the directory: https://www.ai-visibility.org.uk/
#
# AI Discovery Files Pack
# Would you rather not create these files yourself? Our professional service
# builds a complete, tailored set of AI Discovery Files for your business.
# Get your AI Discovery Files: https://www.ai-visibility.org.uk/get-started/