70_sare_html2.cf 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339
  1. # SARE HTML Ruleset for SpamAssassin - ruleset 2
  2. # Version: 01.03.10
  3. # Created: 2004-03-31
  4. # Modified: 2006-06-03
  5. # Usage instructions, documentation, and change history in 70_sare_html0.cf
  6. #@@# Revision History: Full Revision History stored in 70_sare_html.log
  7. #@@# 01.03.09: May ?? 2006
  8. #@@# Minor score tweaks based on recent mass-checks
  9. #@@# Moved file 0 to file 2: SARE_HTML_EHTML_OBFU
  10. #@@# Moved file 0 to file 2: SARE_HTML_HEAD_AFFIL
  11. #@@# Moved file 0 to file 2: SARE_HTML_LEAKTHRU1
  12. #@@# Moved file 0 to file 2: SARE_HTML_LEAKTHRU2
  13. #@@# Moved file 0 to file 2: SARE_HTML_ONE_LINE3
  14. #@@# Moved file 0 to file 2: SARE_HTML_POB1200
  15. #@@# Moved file 0 to file 2: SARE_HTML_URI_HIDADD
  16. #@@# Moved file 0 to file 2: SARE_HTML_URI_LOGOGEN
  17. #@@# Moved file 0 to file 2: SARE_HTML_URI_OFF
  18. #@@# Moved file 0 to file 2: SARE_HTML_USL_B7
  19. #@@# Moved file 0 to file 2: SARE_HTML_USL_B9
  20. #@@# Moved file 0 to file 2: SARE_PHISH_HTML_01
  21. #@@# 01.03.10: June 3 2006
  22. #@@# Minor score tweaks based on recent mass-checks
  23. #@@# Moved file 1 to 2: SARE_HTML_BR_MANY
  24. #@@# Moved file 1 to 2: SARE_HTML_ONE_LINE2
  25. #@@# Moved file 1 to 2: SARE_HTML_URI_OC
  26. # License: Artistic - see http://www.rulesemporium.com/license.txt
  27. # Current Maintainer: Bob Menschel - RMSA@Menschel.net
  28. # Current Home: http://www.rulesemporium.com/rules/70_sare_html2.cf
  29. #
  30. ######## ###################### ##################################################
  31. rawbody __SARE_HTML_HAS_A eval:html_tag_exists('a')
  32. rawbody __SARE_HTML_HAS_BR eval:html_tag_exists('br')
  33. rawbody __SARE_HTML_HAS_DIV eval:html_tag_exists('div')
  34. rawbody __SARE_HTML_HAS_FONT eval:html_tag_exists('font')
  35. rawbody __SARE_HTML_HAS_IMG eval:html_tag_exists('img')
  36. rawbody __SARE_HTML_HAS_P eval:html_tag_exists('p')
  37. rawbody __SARE_HTML_HAS_PRE eval:html_tag_exists('pre')
  38. rawbody __SARE_HTML_HAS_TITLE eval:html_tag_exists('title')
  39. rawbody __SARE_HTML_HBODY m'<html><body>'i
  40. rawbody __SARE_HTML_BEHTML m'<body></html>'i
  41. rawbody __SARE_HTML_BEHTML2 m'^</?body></html>'i
  42. rawbody __SARE_HTML_EFONT m'^</font>'i
  43. rawbody __SARE_HTML_EHEB m'^</html></body>'i
  44. rawbody __SARE_HTML_CMT_CNTR /<center><!--/
  45. ######## ###################### ##################################################
  46. # <HTML> and <BODY> tag spamsign
  47. ######## ###################### ##################################################
  48. rawbody SARE_HTML_EHTML_OBFU m'<\s*/\s+(?!html)[HTmL\s]{4,}>'i
  49. describe SARE_HTML_EHTML_OBFU Phoney tag
  50. score SARE_HTML_EHTML_OBFU 1.111
  51. #stype SARE_HTML_EHTML_OBFU spamp
  52. #hist SARE_HTML_EHTML_OBFU Loren Wilton, June 2005
  53. #counts SARE_HTML_EHTML_OBFU 0s/0h of 333405 corpus (262498s/70907h RM) 05/12/06
  54. #max SARE_HTML_EHTML_OBFU 30s/0h of 619677 corpus (318875s/300802h RM) 09/11/05
  55. #counts SARE_HTML_EHTML_OBFU 0s/0h of 11260 corpus (6568s/4692h CT) 06/17/05
  56. #counts SARE_HTML_EHTML_OBFU 0s/0h of 6804 corpus (1336s/5468h ft) 06/17/05
  57. #counts SARE_HTML_EHTML_OBFU 21s/0h of 54067 corpus (16890s/37177h JH-3.01) 06/18/05
  58. #counts SARE_HTML_EHTML_OBFU 0s/0h of 23068 corpus (17346s/5722h MY) 05/14/06
  59. #max SARE_HTML_EHTML_OBFU 34s/0h of 57287 corpus (52272s/5015h MY) 09/22/05
  60. ######## ###################### ##################################################
  61. # Spamsign character sets and fonts
  62. ######## ###################### ##################################################
  63. rawbody SARE_HTML_COLOR_D /(?:style="?|<style[^>]*>)[^>"]*[^-]color\s*:\s*rgb\(\s*(?:100|9[0-9]|8[6-9])\s*%\s*,\s*(?:100|9[0-9]|8[6-9])\s*%\s*,\s*(?:100|9[0-9]|8[6-9])\s*%\s*\)[^>]*>/i
  64. describe SARE_HTML_COLOR_D BAD STYLE: color: too light (rgb(%))
  65. score SARE_HTML_COLOR_D 0.100
  66. #hist SARE_HTML_COLOR_D From Jesse Houwing May 14 2004
  67. #counts SARE_HTML_COLOR_D 0s/0h of 98435 corpus (76828s/21607h RM) 05/14/04
  68. #counts SARE_HTML_COLOR_D 0s/0h of 29365 corpus (5882s/23483h JH) 08/14/04 TM2 SA3.0-pre2
  69. rawbody SARE_HTML_POB1200 /width="599" bgColor="\#9999FF"/i
  70. describe SARE_HTML_POB1200 Used by POB1200 Orangestad spammer
  71. score SARE_HTML_POB1200 1.666
  72. #stype SARE_HTML_POB1200 spamp
  73. #hist SARE_HTML_POB1200 Jennifer Wheeler <jennifer.sare@nxtek.net> May 17 2004
  74. #counts SARE_HTML_POB1200 0s/0h of 196681 corpus (96193s/100488h RM) 02/22/05
  75. #max SARE_HTML_POB1200 414s/0h of 114422 corpus (81069s/33353h RM) 01/16/05
  76. #counts SARE_HTML_POB1200 1s/0h of 54067 corpus (16890s/37177h JH-3.01) 06/18/05
  77. #max SARE_HTML_POB1200 18s/0h of 38858 corpus (15368s/23490h JH-SA3.0rc1) 08/22/04
  78. #counts SARE_HTML_POB1200 0s/0h of 57287 corpus (52272s/5015h MY) 09/22/05
  79. #max SARE_HTML_POB1200 42s/0h of 18153 corpus (15872s/2281h MY) 05/18/04
  80. #counts SARE_HTML_POB1200 0s/0h of 10826 corpus (6364s/4462h CT) 05/28/05
  81. ######## ###################### ##################################################
  82. # <FRAME> Tag Tests
  83. ######## ###################### ##################################################
  84. rawbody SARE_HTML_NOFRAMES /<frame><noframes>\w*<\/noframes><\/frame>/i
  85. describe SARE_HTML_NOFRAMES Body appears to hide anti-anti-spam text in frame
  86. score SARE_HTML_NOFRAMES 1.000
  87. #counts SARE_HTML_NOFRAMES 0s/0h of 98542 corpus (76935s/21607h RM) 05/12/04
  88. #max SARE_HTML_NOFRAMES 96 spam, 0 ham, Sep 5 2003
  89. #counts SARE_HTML_NOFRAMES 0s/0h of 29365 corpus (5882s/23483h JH) 08/14/04 TM2 SA3.0-pre2
  90. ######## ###################### ##################################################
  91. # Invalid or Suspicious URI Tests
  92. ######## ###################### ##################################################
  93. rawbody SARE_HTML_URI_GBYE />Good Bye<\/a>/i
  94. describe SARE_HTML_URI_GBYE text has URL to spammer's unsubscribe link
  95. score SARE_HTML_URI_GBYE 0.100
  96. #counts SARE_HTML_URI_GBYE 0s/0h of 98542 corpus (76935s/21607h RM) 05/12/04
  97. #counts SARE_HTML_URI_GBYE 0s/0h of 29365 corpus (5882s/23483h JH) 08/14/04 TM2 SA3.0-pre2
  98. #overlap SARE_HTML_URI_HIDADD Overlaps completely within SARE_HTML_P_BREAK 2004-06-11
  99. rawbody SARE_HTML_URI_HIDADD /(?:\&\~c\&o\&m|\&\~n\&e\&t)/i
  100. describe SARE_HTML_URI_HIDADD URI with obfuscated destination
  101. score SARE_HTML_URI_HIDADD 1.666
  102. #stype SARE_HTML_URI_HIDADD spamp
  103. #hist SARE_HTML_URI_HIDADD Fred T: FR_HIDDEN_ADDY
  104. #overlap SARE_HTML_URI_HIDADD Overlaps completely within SARE_HTML_P_BREAK 2004-06-11
  105. #counts SARE_HTML_URI_HIDADD 0s/0h of 333405 corpus (262498s/70907h RM) 05/12/06
  106. #max SARE_HTML_URI_HIDADD 817s/0h of 400504 corpus (178155s/222349h RM) 03/31/05
  107. #counts SARE_HTML_URI_HIDADD 0s/0h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05
  108. #max SARE_HTML_URI_HIDADD 2s/0h of 32260 corpus (8983s/23277h JH) 05/14/04
  109. #counts SARE_HTML_URI_HIDADD 0s/0h of 23068 corpus (17346s/5722h MY) 05/14/06
  110. #max SARE_HTML_URI_HIDADD 1s/0h of 47221 corpus (42968s/4253h MY) 06/18/05
  111. #counts SARE_HTML_URI_HIDADD 0s/0h of 10629 corpus (5847s/4782h CT) 09/18/05
  112. uri SARE_HTML_URI_HIDE1 /:ac=[A-Z,a-z,0-9,@,!,;]+/
  113. describe SARE_HTML_URI_HIDE1 URI attempts to hide destination domain
  114. score SARE_HTML_URI_HIDE1 0.100
  115. #counts SARE_HTML_URI_HIDE1 0s/0h of 98542 corpus (76935s/21607h RM) 05/12/04
  116. #counts SARE_HTML_URI_HIDE1 0s/0h of 29365 corpus (5882s/23483h JH) 08/14/04 TM2 SA3.0-pre2
  117. uri SARE_HTML_URI_LOGOGEN m{/logogen\.img\?}i
  118. score SARE_HTML_URI_LOGOGEN 1.666
  119. describe SARE_HTML_URI_LOGOGEN Uses some logo generation software
  120. #hist SARE_HTML_URI_LOGOGEN Jesse Houwing, Aug 19 2004
  121. #counts SARE_HTML_URI_LOGOGEN 0s/0h of 175738 corpus (98979s/76759h RM) 02/14/05
  122. #max SARE_HTML_URI_LOGOGEN 6s/0h of 65858 corpus (40621s/25237h RM) 08/19/04
  123. #counts SARE_HTML_URI_LOGOGEN 319s/0h of 54067 corpus (16890s/37177h JH-3.01) 06/18/05
  124. #max SARE_HTML_URI_LOGOGEN 453s/0h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05
  125. #counts SARE_HTML_URI_LOGOGEN 0s/0h of 47221 corpus (42968s/4253h MY) 06/18/05
  126. #max SARE_HTML_URI_LOGOGEN 48s/0h of 18647 corpus (16116s/2531h MY) 08/25/04
  127. #counts SARE_HTML_URI_LOGOGEN 0s/0h of 11260 corpus (6568s/4692h CT) 06/17/05
  128. #max SARE_HTML_URI_LOGOGEN 7s/0h of 10826 corpus (6364s/4462h CT) 05/28/05
  129. uri SARE_HTML_URI_OC /\?oc=\d{4,10}/
  130. describe SARE_HTML_URI_OC Possible spammer sign in URL
  131. score SARE_HTML_URI_OC 1.666
  132. #hist SARE_HTML_URI_OC LW_URI_OC
  133. #counts SARE_HTML_URI_OC 0s/0h of 689155 corpus (348140s/341015h RM) 09/18/05
  134. #max SARE_HTML_URI_OC 440s/0h of 89461 corpus (67464s/21997h RM) 05/29/04
  135. #counts SARE_HTML_URI_OC 0s/0h of 54067 corpus (16890s/37177h JH-3.01) 06/18/05
  136. #max SARE_HTML_URI_OC 17s/0h of 38858 corpus (15368s/23490h JH-SA3.0rc1) 08/22/04
  137. #counts SARE_HTML_URI_OC 0s/0h of 26326 corpus (22886s/3440h MY) 02/15/05
  138. #max SARE_HTML_URI_OC 85s/0h of 13454 corpus (11339s/2115h MY) 06/02/04
  139. uri SARE_HTML_URI_OFF /http.{5,35}\boff\.(?:htm|html|php|asp|pl|cgi|jsp)\b/i
  140. describe SARE_HTML_URI_OFF URI to page name which suggests spammer's page
  141. score SARE_HTML_URI_OFF 2.222
  142. #hist SARE_HTML_URI_OFF FR_PAGE_OFF
  143. #counts SARE_HTML_URI_OFF 0s/0h of 333405 corpus (262498s/70907h RM) 05/12/06
  144. #max SARE_HTML_URI_OFF 2619s/0h of 109180 corpus (88746s/20434h RM) 04/09/04
  145. #counts SARE_HTML_URI_OFF 2s/0h of 54067 corpus (16890s/37177h JH-3.01) 06/18/05
  146. #max SARE_HTML_URI_OFF 89s/0h of 32260 corpus (8983s/23277h JH) 05/14/04
  147. #counts SARE_HTML_URI_OFF 0s/0h of 26326 corpus (22886s/3440h MY) 02/15/05
  148. #counts SARE_HTML_URI_OFF 0s/0h of 10826 corpus (6364s/4462h CT) 05/28/05
  149. #max SARE_HTML_URI_OFF 39s/0h of 6944 corpus (3188s/3756h CT) 05/19/04
  150. ######## ###################### ##################################################
  151. # Header tags
  152. ######## ###################### ##################################################
  153. rawbody SARE_HTML_HEAD_AFFIL /\<h[0-9]\>.{2,30}\/.{1,3}affiliate.{1,20}\<\/h[0-9]\>/i
  154. describe SARE_HTML_HEAD_AFFIL Affiliate in BOLD
  155. score SARE_HTML_HEAD_AFFIL 0.744
  156. #hist SARE_HTML_HEAD_AFFIL Matt Yackley, Apr 15 2005
  157. #counts SARE_HTML_HEAD_AFFIL 0s/0h of 619677 corpus (318875s/300802h RM) 09/11/05
  158. #max SARE_HTML_HEAD_AFFIL 23s/0h of 292246 corpus (119174s/173072h RM) 04/15/05
  159. #counts SARE_HTML_HEAD_AFFIL 0s/0h of 13290 corpus (7418s/5872h CT) 05/14/06
  160. #max SARE_HTML_HEAD_AFFIL 1s/0h of 10826 corpus (6364s/4462h CT) 05/28/05
  161. #counts SARE_HTML_HEAD_AFFIL 0s/0h of 54067 corpus (16890s/37177h JH-3.01) 06/18/05
  162. #counts SARE_HTML_HEAD_AFFIL 0s/0h of 23068 corpus (17346s/5722h MY) 05/14/06
  163. #max SARE_HTML_HEAD_AFFIL 10s/0h of 47221 corpus (42968s/4253h MY) 06/18/05
  164. ######## ###################### ##################################################
  165. # Suspicious tag combinations
  166. ######## ###################### ##################################################
  167. rawbody SARE_HTML_ONE_LINE2 m'<body><p><a href="http://\w+\.\w+\.info/\?[\w\.]+"><IMG SRC="cid:[\w\@\.]+" border="0" ALT=""></a>'
  168. describe SARE_HTML_ONE_LINE2 standard spam formatting
  169. score SARE_HTML_ONE_LINE2 1.111
  170. #stype SARE_HTML_ONE_LINE2 spamp
  171. #hist SARE_HTML_ONE_LINE2 Loren Wilton, LW_SINGLELINE4 Sep 5 2004
  172. #counts SARE_HTML_ONE_LINE2 0s/0h of 281655 corpus (110173s/171482h RM) 05/05/05
  173. #max SARE_HTML_ONE_LINE2 22s/0h of 114422 corpus (81069s/33353h RM) 01/16/05
  174. #counts SARE_HTML_ONE_LINE2 1s/0h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05
  175. #counts SARE_HTML_ONE_LINE2 0s/0h of 57287 corpus (52272s/5015h MY) 09/22/05
  176. #max SARE_HTML_ONE_LINE2 5s/0h of 26326 corpus (22886s/3440h MY) 02/15/05
  177. full SARE_HTML_ONE_LINE3 m'\n<html><body>\n<center>.{0,140}</center>\n</body></html>\n'
  178. describe SARE_HTML_ONE_LINE3 Another single-line centered HTML message
  179. score SARE_HTML_ONE_LINE3 1.256
  180. #hist SARE_HTML_ONE_LINE3 Loren Wilton: LW_SINGLELINE4
  181. #counts SARE_HTML_ONE_LINE3 0s/0h of 281271 corpus (109792s/171479h RM) 05/05/05
  182. #max SARE_HTML_ONE_LINE3 64s/0h of 70245 corpus (42816s/27429h RM) 10/02/04
  183. #counts SARE_HTML_ONE_LINE3 61s/0h of 54969 corpus (17793s/37176h JH-3.01) 03/13/05
  184. #counts SARE_HTML_ONE_LINE3 0s/0h of 19447 corpus (16862s/2585h MY) 10/06/04
  185. #counts SARE_HTML_ONE_LINE3 0s/0h of 11260 corpus (6568s/4692h CT) 06/17/05
  186. #max SARE_HTML_ONE_LINE3 1s/0h of 10826 corpus (6364s/4462h CT) 05/28/05
  187. rawbody SARE_HTML_LEAKTHRU1 m'^<BODY><p><(\w+)></(?:\1)><A href=\"[^"]+\"><(\w+)></(?:\2)>$'
  188. score SARE_HTML_LEAKTHRU1 1.111
  189. #stype SARE_HTML_LEAKTHRU1 spamp
  190. #hist SARE_HTML_LEAKTHRU1 Loren Wilton: LW_LEAKTHRU
  191. describe SARE_HTML_LEAKTHRU1 Another image-only spam
  192. #counts SARE_HTML_LEAKTHRU1 0s/0h of 619677 corpus (318875s/300802h RM) 09/11/05
  193. #max SARE_HTML_LEAKTHRU1 72s/0h of 196642 corpus (96193s/100449h RM) 02/22/05
  194. #counts SARE_HTML_LEAKTHRU1 0s/0h of 54969 corpus (17793s/37176h JH-3.01) 03/13/05
  195. #counts SARE_HTML_LEAKTHRU1 0s/0h of 23068 corpus (17346s/5722h MY) 05/14/06
  196. #max SARE_HTML_LEAKTHRU1 22s/0h of 31513 corpus (27912s/3601h MY) 03/09/05
  197. #counts SARE_HTML_LEAKTHRU1 0s/0h of 11260 corpus (6568s/4692h CT) 06/17/05
  198. rawbody SARE_HTML_LEAKTHRU2 m'^<BODY><p><(\w+)(?:\s[\w\=]+)?></(?:\1)><A href=\"[^"]+\"><(\w+)(?:\s[\w\=]+)?></(?:\2)>$'
  199. score SARE_HTML_LEAKTHRU2 1.666
  200. #stype SARE_HTML_LEAKTHRU2 spamp
  201. #hist SARE_HTML_LEAKTHRU2 Loren Wilton: LW_LEAKTHRU1
  202. describe SARE_HTML_LEAKTHRU2 Another image-only spam
  203. #counts SARE_HTML_LEAKTHRU2 0s/0h of 619677 corpus (318875s/300802h RM) 09/11/05
  204. #max SARE_HTML_LEAKTHRU2 178s/0h of 283600 corpus (129945s/153655h RM) 03/08/05
  205. #counts SARE_HTML_LEAKTHRU2 0s/0h of 54969 corpus (17793s/37176h JH-3.01) 03/13/05
  206. #counts SARE_HTML_LEAKTHRU2 0s/0h of 23068 corpus (17346s/5722h MY) 05/14/06
  207. #max SARE_HTML_LEAKTHRU2 48s/0h of 31513 corpus (27912s/3601h MY) 03/09/05
  208. #counts SARE_HTML_LEAKTHRU2 0s/0h of 11260 corpus (6568s/4692h CT) 06/17/05
  209. ######## ###################### ##################################################
  210. # Useless tags (tag structures that do nothing)
  211. # Largely submitted by Matt Yackley, with contributions by
  212. # Carl Friend, Jennifer Wheeler, Scott Sprunger, Larry Gilson
  213. ######## ###################### ##################################################
  214. rawbody SARE_HTML_USL_B7 /(<b><\/b>.{1,5}){7,8}/i
  215. describe SARE_HTML_USL_B7 Multiple <b></b> (7-8)
  216. score SARE_HTML_USL_B7 0.100
  217. #counts SARE_HTML_USL_B7 0s/0h of 333405 corpus (262498s/70907h RM) 05/12/06
  218. #max SARE_HTML_USL_B7 105s/0h of 689155 corpus (348140s/341015h RM) 09/18/05
  219. #counts SARE_HTML_USL_B7 0s/0h of 29365 corpus (5882s/23483h JH) 08/14/04 TM2 SA3.0-pre2
  220. #counts SARE_HTML_USL_B7 0s/0h of 57287 corpus (52272s/5015h MY) 09/22/05
  221. rawbody SARE_HTML_USL_B9 /(<b><\/b>.{1,5}){9,10}/i
  222. describe SARE_HTML_USL_B9 Multiple <b></b> (9-10)
  223. score SARE_HTML_USL_B9 0.100
  224. #counts SARE_HTML_USL_B9 0s/0h of 333405 corpus (262498s/70907h RM) 05/12/06
  225. #max SARE_HTML_USL_B9 99s/0h of 689155 corpus (348140s/341015h RM) 09/18/05
  226. #counts SARE_HTML_USL_B9 0s/0h of 29365 corpus (5882s/23483h JH) 08/14/04 TM2 SA3.0-pre2
  227. #counts SARE_HTML_USL_B9 0s/0h of 57287 corpus (52272s/5015h MY) 09/22/05
  228. ######## ###################### ##################################################
  229. # <tag ... ALT= ...> tag tests
  230. ######## ###################### ##################################################
  231. ######## ###################### ##################################################
  232. # <!-- Comment tag tests
  233. ######## ###################### ##################################################
  234. rawbody SARE_HTML_CMT_MONEY /<\!--\${1,10}-->/i
  235. describe SARE_HTML_CMT_MONEY HTML Comment seems to mention money
  236. score SARE_HTML_CMT_MONEY 0.100
  237. #counts SARE_HTML_CMT_MONEY 0s/0h of 98542 corpus (76935s/21607h RM) 05/12/04
  238. #counts SARE_HTML_CMT_MONEY 0s/0h of 29365 corpus (5882s/23483h JH) 08/14/04 TM2 SA3.0-pre2
  239. ######## ###################### ##################################################
  240. # Image tag tests
  241. ######## ###################### ##################################################
  242. rawbody SARE_HTML_GIF_NUM /\.gif\d{2,}/i
  243. describe SARE_HTML_GIF_NUM HTML contains tracking numbers after .gif
  244. score SARE_HTML_GIF_NUM 0.100
  245. #counts SARE_HTML_GIF_NUM 0s/0h of 98542 corpus (76935s/21607h RM) 05/12/04
  246. #counts SARE_HTML_GIF_NUM 0s/0h of 29365 corpus (5882s/23483h JH) 08/14/04 TM2 SA3.0-pre2
  247. ######## ###################### ##################################################
  248. # Paragraphs, breaks, and spacings
  249. ######## ###################### ##################################################
  250. rawbody SARE_HTML_BR_MANY /<br>{5}/i
  251. describe SARE_HTML_BR_MANY Too many sequential identical HTML tags
  252. score SARE_HTML_BR_MANY 0.555
  253. #stype SARE_HTML_BR_MANY spamp
  254. #counts SARE_HTML_BR_MANY 0s/0h of 689155 corpus (348140s/341015h RM) 09/18/05
  255. #max SARE_HTML_BR_MANY 2s/0h of 258858 corpus (114246s/144612h RM) 05/27/05
  256. #counts SARE_HTML_BR_MANY 0s/0h of 29365 corpus (5882s/23483h JH) 08/14/04 TM2 SA3.0-pre2
  257. #counts SARE_HTML_BR_MANY 0s/0h of 54067 corpus (16890s/37177h JH-3.01) 06/18/05
  258. #counts SARE_HTML_BR_MANY 0s/0h of 47221 corpus (42968s/4253h MY) 06/18/05
  259. rawbody __SARE_HTML_MANY_BR05 /<br>\s*<br>\s*<br>\s*<br>\s*<br>\s*<br>/i
  260. meta SARE_HTML_MANY_BR05 __SARE_HTML_MANY_BR05 && HTML_MESSAGE
  261. describe SARE_HTML_MANY_BR05 Tooo many <br>'s!
  262. score SARE_HTML_MANY_BR05 0.500
  263. #hist SARE_HTML_MANY_BR05 Contrib by Matt Keller June 7 2004
  264. #note SARE_HTML_MANY_BR05 Remove HTML_MESSAGE test increases spam 4% but doubles ham
  265. #hist SARE_HTML_MANY_BR05 this and SARE_HTML_MANY_BR10 obsolete SARE_HTML_TD_BR4 = FR_WICKED_SPAM_??
  266. #counts SARE_HTML_MANY_BR05 0s/0h of 114422 corpus (81069s/33353h RM) 01/16/05
  267. #alone SARE_HTML_MANY_BR05 2051s/43h of 66351 corpus (40971s/25380h RM) 08/21/04
  268. #counts SARE_HTML_MANY_BR05 0s/0h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05
  269. #max SARE_HTML_MANY_BR05 755s/2h of 38858 corpus (15368s/23490h JH-SA3.0rc1) 08/22/04
  270. #counts SARE_HTML_MANY_BR05 0s/0h of 26326 corpus (22886s/3440h MY) 02/15/05
  271. ######## ###################### ##################################################
  272. # Javascript and object tests
  273. ######## ###################### ##################################################
  274. rawbody SARE_HTML_JVS_POPUP /<body onload \= \"window\.open/i
  275. describe SARE_HTML_JVS_POPUP Bad HTML form. Tries to load a javascript pop up.
  276. score SARE_HTML_JVS_POPUP 0.100
  277. #counts SARE_HTML_JVS_POPUP 0s/0h of 98542 corpus (76935s/21607h RM) 05/12/04
  278. #counts SARE_HTML_JVS_POPUP 0s/0h of 29365 corpus (5882s/23483h JH) 08/14/04 TM2 SA3.0-pre2
  279. ######## ###################### ##################################################
  280. # Tests destined for other rule sets
  281. ######## ###################### ##################################################
  282. full __SARE_PHISH_HTML_01a m*<a[^<]{0,60} onMouseMove=(?:3D)?"window.status=(?:3D)?'https?://*
  283. rawbody __SARE_PHISH_HTML_01b m*<a[^<]{0,60} onMouseMove=(?:3D)?"window.status=(?:3D)?'https?://*
  284. meta SARE_PHISH_HTML_01 __SARE_PHISH_HTML_01a || __SARE_PHISH_HTML_01b
  285. describe SARE_PHISH_HTML_01 Hiding actual site with fake secure site!
  286. score SARE_PHISH_HTML_01 2.500
  287. #stype SARE_PHISH_HTML_01 spamgg # phish
  288. #hist SARE_PHISH_HTML_01 Loren Wilton: LW_MOUSEMOVE
  289. #counts SARE_PHISH_HTML_01 1s/0h of 619677 corpus (318875s/300802h RM) 09/11/05
  290. #max SARE_PHISH_HTML_01 17s/0h of 70245 corpus (42816s/27429h RM) 10/02/04
  291. #counts SARE_PHISH_HTML_01 2s/0h of 54067 corpus (16890s/37177h JH-3.01) 06/18/05
  292. #max SARE_PHISH_HTML_01 5s/0h of 54969 corpus (17793s/37176h JH-3.01) 03/13/05
  293. #counts SARE_PHISH_HTML_01 0s/0h of 47221 corpus (42968s/4253h MY) 06/18/05
  294. #max SARE_PHISH_HTML_01 6s/0h of 19447 corpus (16862s/2585h MY) 10/06/04
  295. #counts SARE_PHISH_HTML_01 0s/0h of 11260 corpus (6568s/4692h CT) 06/17/05
  296. # EOF