robots.txt 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. # Based on http://meta.wikimedia.org/robots.txt
  2. # advertising-related bots:
  3. User-agent: Mediapartners-Google*
  4. Disallow: /
  5. User-agent: Adsbot-Google*
  6. Disallow: /
  7. # Crawlers that are kind enough to obey, but which we'd rather not have
  8. # unless they're feeding search engines.
  9. User-agent: UbiCrawler
  10. Disallow: /
  11. User-agent: DOC
  12. Disallow: /
  13. User-agent: Zao
  14. Disallow: /
  15. # Some bots are known to be trouble, particularly those designed to copy
  16. # entire sites. Please obey robots.txt.
  17. User-agent: sitecheck.internetseer.com
  18. Disallow: /
  19. User-agent: Zealbot
  20. Disallow: /
  21. User-agent: MSIECrawler
  22. Disallow: /
  23. User-agent: SiteSnagger
  24. Disallow: /
  25. User-agent: WebStripper
  26. Disallow: /
  27. User-agent: WebCopier
  28. Disallow: /
  29. User-agent: Fetch
  30. Disallow: /
  31. User-agent: Offline Explorer
  32. Disallow: /
  33. User-agent: Teleport
  34. Disallow: /
  35. User-agent: TeleportPro
  36. Disallow: /
  37. User-agent: WebZIP
  38. Disallow: /
  39. User-agent: linko
  40. Disallow: /
  41. User-agent: HTTrack
  42. Disallow: /
  43. User-agent: Microsoft.URL.Control
  44. Disallow: /
  45. User-agent: Xenu
  46. Disallow: /
  47. User-agent: larbin
  48. Disallow: /
  49. User-agent: libwww
  50. Disallow: /
  51. User-agent: ZyBORG
  52. Disallow: /
  53. User-agent: Download Ninja
  54. Disallow: /
  55. # wget in its recursive mode is a frequent problem.
  56. # Please read the man page and use it properly; there is a
  57. # --wait option you can use to set the delay between hits,
  58. # for instance.
  59. User-agent: wget
  60. Disallow: /
  61. # The 'grub' distributed client has been *very* poorly behaved.
  62. User-agent: grub-client
  63. Disallow: /
  64. # Doesn't follow robots.txt anyway, but...
  65. User-agent: k2spider
  66. Disallow: /
  67. # Hits many times per second, not acceptable
  68. # http://www.nameprotect.com/botinfo.html
  69. User-agent: NPBot
  70. Disallow: /
  71. # A capture bot, downloads gazillions of pages with no public benefit
  72. # http://www.webreaper.net/
  73. User-agent: WebReaper
  74. Disallow: /
  75. User-agent: *
  76. Disallow: /viewvc/viewvc.cgi
  77. Disallow: /svn/
  78. Disallow: /wiki/skins/
  79. Disallow: /wiki/Special:Random
  80. Disallow: /wiki/Special%3ARandom
  81. Disallow: /wiki/Special:Search
  82. Disallow: /wiki/Special%3ASearch
  83. Disallow: /wiki/Special:Allmessages
  84. Disallow: /wiki/Special%3AAllmessages
  85. Disallow: /wiki/Special:Log
  86. Disallow: /wiki/Special%3ALog
  87. Disallow: /wiki/Special:Listusers
  88. Disallow: /wiki/Special%3AListusers
  89. Disallow: /wiki/*printable=yes*
  90. Disallow: /bugs/file_download.php?*
  91. Disallow: /bugs/bug_view_advanced_page.php?*
  92. Disallow: /bugs/print_bug_page.php?*
  93. Disallow: /bugs/api/
  94. Crawl-delay: 1