123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134 |
- # Based on http://meta.wikimedia.org/robots.txt
- # advertising-related bots:
- User-agent: Mediapartners-Google*
- Disallow: /
- User-agent: Adsbot-Google*
- Disallow: /
- # Crawlers that are kind enough to obey, but which we'd rather not have
- # unless they're feeding search engines.
- User-agent: UbiCrawler
- Disallow: /
- User-agent: DOC
- Disallow: /
- User-agent: Zao
- Disallow: /
- # Some bots are known to be trouble, particularly those designed to copy
- # entire sites. Please obey robots.txt.
- User-agent: sitecheck.internetseer.com
- Disallow: /
- User-agent: Zealbot
- Disallow: /
- User-agent: MSIECrawler
- Disallow: /
- User-agent: SiteSnagger
- Disallow: /
- User-agent: WebStripper
- Disallow: /
- User-agent: WebCopier
- Disallow: /
- User-agent: Fetch
- Disallow: /
- User-agent: Offline Explorer
- Disallow: /
- User-agent: Teleport
- Disallow: /
- User-agent: TeleportPro
- Disallow: /
- User-agent: WebZIP
- Disallow: /
- User-agent: linko
- Disallow: /
- User-agent: HTTrack
- Disallow: /
- User-agent: Microsoft.URL.Control
- Disallow: /
- User-agent: Xenu
- Disallow: /
- User-agent: larbin
- Disallow: /
- User-agent: libwww
- Disallow: /
- User-agent: ZyBORG
- Disallow: /
- User-agent: Download Ninja
- Disallow: /
- # wget in its recursive mode is a frequent problem.
- # Please read the man page and use it properly; there is a
- # --wait option you can use to set the delay between hits,
- # for instance.
- User-agent: wget
- Disallow: /
- # The 'grub' distributed client has been *very* poorly behaved.
- User-agent: grub-client
- Disallow: /
- # Doesn't follow robots.txt anyway, but...
- User-agent: k2spider
- Disallow: /
- # Hits many times per second, not acceptable
- # http://www.nameprotect.com/botinfo.html
- User-agent: NPBot
- Disallow: /
- # A capture bot, downloads gazillions of pages with no public benefit
- # http://www.webreaper.net/
- User-agent: WebReaper
- Disallow: /
- User-agent: *
- Disallow: /viewvc/viewvc.cgi
- Disallow: /svn/
- Disallow: /wiki/skins/
- Disallow: /wiki/Special:Random
- Disallow: /wiki/Special%3ARandom
- Disallow: /wiki/Special:Search
- Disallow: /wiki/Special%3ASearch
- Disallow: /wiki/Special:Allmessages
- Disallow: /wiki/Special%3AAllmessages
- Disallow: /wiki/Special:Log
- Disallow: /wiki/Special%3ALog
- Disallow: /wiki/Special:Listusers
- Disallow: /wiki/Special%3AListusers
- Disallow: /wiki/*printable=yes*
- Disallow: /bugs/file_download.php?*
- Disallow: /bugs/bug_view_advanced_page.php?*
- Disallow: /bugs/print_bug_page.php?*
- Disallow: /bugs/api/
- Crawl-delay: 1
|