# 允许所有搜索引擎爬虫访问公开内容 User-Agent: * Allow: / Crawl-delay: 2 # ======== 屏蔽AI训练爬虫 ======== # OpenAI User-agent: GPTBot Disallow: / User-agent: ChatGPT-User Disallow: / User-agent: OpenAI Disallow: / # Google AI User-agent: Google-Extended Disallow: / User-agent: Google-AI Disallow: / # Anthropic (Claude) User-agent: Claude-Web Disallow: / User-agent: ClaudeBot Disallow: / User-agent: Anthropic Disallow: / # Common Crawl User-agent: CCBot Disallow: / # Facebook/Meta AI User-agent: FacebookBot Disallow: / User-agent: Meta-AI Disallow: / # 其他AI/数据收集爬虫 User-agent: cohere-ai Disallow: / User-agent: ai21 Disallow: / User-agent: Amazonbot Disallow: / User-agent: Applebot Disallow: / User-agent: PetalBot Disallow: / User-agent: Bytespider # TikTok Disallow: / # ======== 目录限制 ======== # 可选:限制特定目录 Disallow: /admin/ Disallow: /private/ Disallow: /api/ Disallow: /ajax/ Disallow: /user-data/ # 站点地图 Sitemap: https://trohelen.com/sitemap.xml Sitemap: https://trohelen.com/news-sitemap.xml # 额外指令 Host: trohelen.com # 建议爬虫不要缓存页面 Cache-control: no-cache # 限制AI训练使用 X-Robots-Tag: noai X-Robots-Tag: noimageai