# 允许所有搜索引擎爬虫访问公开内容
User-Agent: *
Allow: /
Crawl-delay: 2

# ======== 屏蔽AI训练爬虫 ========
# OpenAI
User-agent: GPTBot
Disallow: /
User-agent: ChatGPT-User
Disallow: /
User-agent: OpenAI
Disallow: /

# Google AI
User-agent: Google-Extended
Disallow: /
User-agent: Google-AI
Disallow: /

# Anthropic (Claude)
User-agent: Claude-Web
Disallow: /
User-agent: ClaudeBot
Disallow: /
User-agent: Anthropic
Disallow: /

# Common Crawl
User-agent: CCBot
Disallow: /

# Facebook/Meta AI
User-agent: FacebookBot
Disallow: /
User-agent: Meta-AI
Disallow: /

# 其他AI/数据收集爬虫
User-agent: cohere-ai
Disallow: /
User-agent: ai21
Disallow: /
User-agent: Amazonbot
Disallow: /
User-agent: Applebot
Disallow: /
User-agent: PetalBot
Disallow: /
User-agent: Bytespider # TikTok
Disallow: /

# ======== 目录限制 ========
# 可选：限制特定目录
Disallow: /admin/
Disallow: /private/
Disallow: /api/
Disallow: /ajax/
Disallow: /user-data/

# 站点地图
Sitemap: https://trohelen.com/sitemap.xml
Sitemap: https://trohelen.com/news-sitemap.xml

# 额外指令
Host: trohelen.com
# 建议爬虫不要缓存页面
Cache-control: no-cache
# 限制AI训练使用
X-Robots-Tag: noai
X-Robots-Tag: noimageai