diff --git a/static/robots.txt b/static/robots.txt index da0eee6..54b2c34 100644 --- a/static/robots.txt +++ b/static/robots.txt @@ -1,7 +1,52 @@ +# Disallow crawling of specific directories and file types User-agent: * Disallow: /*.asc.txt$ Disallow: /*.pdf$ Disallow: /documents/ Disallow: /img/ +# https://github.com/ai-robots-txt/ai.robots.txt/ +User-agent: AI2Bot +User-agent: Ai2Bot-Dolma +User-agent: Amazonbot +User-agent: anthropic-ai +User-agent: Applebot +User-agent: Applebot-Extended +User-agent: Bytespider +User-agent: CCBot +User-agent: ChatGPT-User +User-agent: Claude-Web +User-agent: ClaudeBot +User-agent: cohere-ai +User-agent: Diffbot +User-agent: DuckAssistBot +User-agent: FacebookBot +User-agent: facebookexternalhit +User-agent: FriendlyCrawler +User-agent: Google-Extended +User-agent: GoogleOther +User-agent: GoogleOther-Image +User-agent: GoogleOther-Video +User-agent: GPTBot +User-agent: iaskspider/2.0 +User-agent: ICC-Crawler +User-agent: ImagesiftBot +User-agent: img2dataset +User-agent: ISSCyberRiskCrawler +User-agent: Kangaroo Bot +User-agent: Meta-ExternalAgent +User-agent: Meta-ExternalFetcher +User-agent: OAI-SearchBot +User-agent: omgili +User-agent: omgilibot +User-agent: PerplexityBot +User-agent: PetalBot +User-agent: Scrapy +User-agent: Sidetrade indexer bot +User-agent: Timpibot +User-agent: VelenPublicWebCrawler +User-agent: Webzio-Extended +User-agent: YouBot +Disallow: / + Sitemap: https://ethanyoo.com/sitemap.xml