70_sare_adult.cf 53 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914
  1. # SARE "Adult" Ruleset for SpamAssassin
  2. # Version: 01.02.08 # The Adult set has been renamed to match SARE's updated standards, the new name is 70_sare_adult.cf
  3. # Created: 2004-03-23
  4. # Modified: 2007-05-21
  5. # Changes: Fixed broken meta
  6. # License: Artistic - see http://www.rulesemporium.com/license.txt
  7. # Current Maintainer: Matt Yackley - adult@rulesemporium.com
  8. # Maintainer: Doc Schneider - maddoc@maddoc.net
  9. # Current Home: http://www.rulesemporium.com/rules/70_sare_adult.cf
  10. #
  11. ####
  12. ###############################
  13. # subject rules #
  14. ###############################
  15. header SARE_SUBJ_SLUT Subject =~ /\bslut\b/i
  16. score SARE_SUBJ_SLUT 1.66
  17. #counts SARE_SUBJ_SLUT 89s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  18. #counts SARE_SUBJ_SLUT 5s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  19. header __FPS_BREAST Subject =~ /\bbreasts?\b/i
  20. header __FPS_COCK Subject =~ /\bcock\b/i
  21. header __FPS_FUCK Subject =~ /\bfuck/i
  22. header __FPS_GIRLS Subject =~ /\bgirls\b/i
  23. header __FPS_HARDCORE Subject =~ /\bhard.?core\b/i
  24. header __FPS_LITTLE Subject =~ /\blittle\b/i
  25. header __FPS_MODEL Subject =~ /\bmodels?\b/i
  26. header __FPS_NAKED Subject =~ /\bnaked\b/i
  27. header __FPS_PENETRAT Subject =~ /\bpenetration\b/i
  28. header __FPS_SEX Subject =~ /\bsex\b/i
  29. header __FPS_SLUT Subject =~ /\bslut\b/i
  30. header __FPS_TEEN Subject =~ /\bteen\b/i
  31. header __FPS_VIRGIN Subject =~ /\bvirgins?\b/i
  32. meta __COUNT_FPORN2 (__FPS_BREAST + __FPS_COCK + __FPS_FUCK + __FPS_GIRLS + __FPS_HARDCORE + __FPS_LITTLE + __FPS_MODEL + __FPS_NAKED + __FPS_PENETRAT + __FPS_SEX + __FPS_SLUT + __FPS_TEEN + __FPS_VIRGIN) > 1
  33. meta __COUNT_FPORN3 (__FPS_BREAST + __FPS_COCK + __FPS_FUCK + __FPS_GIRLS + __FPS_HARDCORE + __FPS_LITTLE + __FPS_MODEL + __FPS_NAKED + __FPS_PENETRAT + __FPS_SEX + __FPS_SLUT + __FPS_TEEN + __FPS_VIRGIN) > 2
  34. meta __COUNT_FPORN4 (__FPS_BREAST + __FPS_COCK + __FPS_FUCK + __FPS_GIRLS + __FPS_HARDCORE + __FPS_LITTLE + __FPS_MODEL + __FPS_NAKED + __FPS_PENETRAT + __FPS_SEX + __FPS_SLUT + __FPS_TEEN + __FPS_VIRGIN) > 3
  35. meta SARE_SUB_MULTI_PRN2 (__COUNT_FPORN2 && !__COUNT_FPORN3)
  36. score SARE_SUB_MULTI_PRN2 1.66
  37. #counts SARE_SUB_MULTI_PRN2 455s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  38. #counts SARE_SUB_MULTI_PRN2 93s/5h of 140226 corpus (90162s/50064h DOC) 04/19/06
  39. meta SARE_SUB_MULTI_PRN3 (__COUNT_FPORN3 && !__COUNT_FPORN4)
  40. score SARE_SUB_MULTI_PRN3 1.66
  41. #counts SARE_SUB_MULTI_PRN3 93s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  42. #counts SARE_SUB_MULTI_PRN3 9s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  43. #meta SARE_SUB_MULTI_PRN4 (__COUNT_FPORN4)
  44. #score SARE_SUB_MULTI_PRN4 3.333
  45. #counts SARE_SUB_MULTI_PRN4 4s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  46. #counts SARE_SUB_MULTI_PRN4 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  47. header SARE_ADLTSUB1 Subject =~ /\b(?:adu?1t|amb[1!]en|b0y|bl0w|c0cks?|c0re|ejaculation|f?r0+m|g(?:[1!]r[1l]|ir[!1])|h0t|ntercourse|jerk off|l1ttle|m0vie|manh00d|[0o]rg\@sm|p1ct|pen[1!]s|(?:ph|f)(?:[0\@]t|ot[0\@])|secks|sm00th|t1ny|t1ts|v(?:irg1|1rgi|1rg1)n|v[i1]de0|violenced|y0ung)/i
  48. describe SARE_ADLTSUB1 Contains OBFU and "strong" adult words
  49. score SARE_ADLTSUB1 1.66
  50. # Combined from M_K_PORN_BOGOSITY_SUBJ, L_s_porn, SUBJECT_XXX, RM_swp_porn4, RM_swp_porn5
  51. # 266s/0h of 119325 corpus (98981s/20344h) 03/21/04
  52. # 45s/0h of 15929 corpus (13729s/2200h) 03/23/04
  53. #counts SARE_ADLTSUB1 503s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  54. #counts SARE_ADLTSUB1 145s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  55. header SARE_ADLTSUB2 Subject =~ /\b(?:blow|climax|enlarg(e|ment)|fuck|inter+acial|lick|porn|penis|pervert|pussy|tits|tight|vagina|virgins?)\b/i
  56. describe SARE_ADLTSUB2 Contains possible adult words
  57. score SARE_ADLTSUB2 1.23
  58. # Combined from SUBJECT_XXX_2, L_s_porn, RM_swp_pervert, RM_swp_porn1, RM_swp_porn2
  59. # 519s/1h of 119325 corpus (98981s/20344h) 03/21/04
  60. # 58s/0h of 15929 corpus (13729s/2200h) 03/23/04
  61. #counts SARE_ADLTSUB2 1967s/2h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  62. #counts SARE_ADLTSUB2 514s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  63. header SARE_ADLTSUB3 Subject =~ /(?!\bporn)(?:\bp|\B(?:[\xDE]|\xCE\xA1|\xCF\x81|\xD0\xA0|\xD1\x80))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)/i
  64. describe SARE_ADLTSUB3 Apparent spam seems to contain porn subject
  65. score SARE_ADLTSUB3 1.66 # type=obfu
  66. # Original name: RM_swp_porn1o1
  67. # 58s/0h of 119325 corpus (98981s/20344h) 03/21/04
  68. # 11s/0h of 15929 corpus (13729s/2200h) 03/23/04
  69. #counts SARE_ADLTSUB3 11s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  70. #counts SARE_ADLTSUB3 15s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  71. header SARE_ADLTSUB4 Subject =~ /(?!\bpo(?:rn|ur))\bp.?o.?r.?n/i
  72. describe SARE_ADLTSUB4 Apparent spam seems to contain porn subject
  73. score SARE_ADLTSUB4 0.89 # type=obfu
  74. # Original name: RM_swp_porn1o2
  75. # 26s/0h of 119325 corpus (98981s/20344h) 03/21/04
  76. # 3s/0h of 15929 corpus (13729s/2200h) 03/23/04
  77. #counts SARE_ADLTSUB4 5s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  78. #counts SARE_ADLTSUB4 5s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  79. header SARE_ADLTSUB5 Subject =~ /(?!\bfuck)(?:\bf|\B(?:\xC5\xBF|\xC6\x92|\xD2[\x92-\x93]))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[uv\*\xB5\xD9-\xDC\xF9-\xFC]|\xC5[\xA8-\xB3]|\xC6[\xAF-\xB0]|\xC7[\x93-\x9C]|\xCE\xB0|\xCE\xBC|\xCF\x8B|\xCF\x8D|\xD4\xB1|\xD5\x84|\xD5\x8D)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[c\*\xC7\xE7\xA2\xA9]|\xC4[\x86-\x8D]|\xD0\xA1|\xD1\x81)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:k|\xC4[\xB6-\xB8]|\xCE\x9A|\xCE\xBA|\xD0\x8C|\xD0\x9A|\xD0\xBA|\xD1\x9C|\xD2[\x9A-\x9D]])/i
  80. describe SARE_ADLTSUB5 Apparent spam seems to contain porn subject
  81. score SARE_ADLTSUB5 1.66 # type=obfu
  82. # Original name: RM_swp_porn2o1
  83. # 8s/0h of 119325 corpus (98981s/20344h) 03/21/04
  84. # 4s/0h of 15929 corpus (13729s/2200h) 03/23/04
  85. #counts SARE_ADLTSUB5 12s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  86. #counts SARE_ADLTSUB5 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  87. header SARE_ADLTSUB6 Subject =~ /(?!\bfuck)\bf.?u.?c.?k/i
  88. describe SARE_ADLTSUB6 Apparent spam seems to contain porn subject
  89. score SARE_ADLTSUB6 1.51 # type=obfu
  90. # Original name: RM_swp_porn2o2
  91. # 3s/0h of 119325 corpus (98981s/20344h) 03/21/04
  92. # 5s/0h of 15929 corpus (13729s/2200h) 03/23/04
  93. #counts SARE_ADLTSUB6 32s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  94. #counts SARE_ADLTSUB6 13s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  95. header SARE_ADLTSUB7 Subject =~ /(?!\bpuss(?:y|ies)\b)(?:\bp|\B(?:[\xDE]|\xCE\xA1|\xCF\x81|\xD0\xA0|\xD1\x80))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[uv\*\xB5\xD9-\xDC\xF9-\xFC]|\xC5[\xA8-\xB3]|\xC6[\xAF-\xB0]|\xC7[\x93-\x9C]|\xCE\xB0|\xCE\xBC|\xCF\x8B|\xCF\x8D|\xD4\xB1|\xD5\x84|\xD5\x8D)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)(?:(?:[y\xA5\xDD\xFD]|\xC5[\xB6-\xB8]|\xCE\x8E|\xCE\xA5|\xCE\xA8|\xCE\xAB|\xCE\xB3|\xD0\xA3|\xD1\x83|\xD1\x9E|\xD2[\xAE-\xB1])|(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F))\b/i
  96. describe SARE_ADLTSUB7 Apparent spam seems to contain porn subject
  97. score SARE_ADLTSUB7 1.66 # type=obfu
  98. # Original name: RM_swp_porn5o1
  99. # 4s/0h of 119325 corpus (98981s/20344h) 03/21/04
  100. # 2s/0h of 15929 corpus (13729s/2200h) 03/23/04
  101. #counts SARE_ADLTSUB7 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  102. #counts SARE_ADLTSUB7 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  103. header SARE_ADLTSUB8 Subject =~ /(?!\bpuss(?:y|ies)\b)\bp.?u.?s.?s.?(?:y|i.?e.?s)\b/i
  104. describe SARE_ADLTSUB8 Apparent spam seems to contain porn subject
  105. score SARE_ADLTSUB8 1.66 # type=obfu
  106. # Original name: RM_swp_porn5o2
  107. # FPS SARE_ADLTSUB8="plus sizes"
  108. # 7s/0h of 119325 corpus (98981s/20344h) 03/21/04
  109. # 0s/0h of 15929 corpus (13729s/2200h) 03/23/04
  110. #counts SARE_ADLTSUB8 6s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  111. #counts SARE_ADLTSUB8 6s/2h of 140226 corpus (90162s/50064h DOC) 04/19/06
  112. #header SARE_ADLTSUB10 Subject =~ /(?!\b(?:rap(?:e[sd]?|ing|pel)|reaping)\b)\br.?a.?p.?(?:e.?[sd]?|i.?n.?g)\b/i
  113. #describe SARE_ADLTSUB10 Apparent spam seems to contain porn subject
  114. #score SARE_ADLTSUB10 2.500 # type=obfu
  115. # Original name: RM_swp_Rapeo2
  116. # 20s/0h of 119325 corpus (98981s/20344h) 03/21/04
  117. # 0s/0h of 15929 corpus (13729s/2200h) 03/23/04
  118. #counts SARE_ADLTSUB10 5s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  119. #counts SARE_ADLTSUB10 6s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  120. #header SARE_BEDROOMSEC Subject =~ /bedroom secret/i
  121. #describe SARE_BEDROOMSEC Common spammer phrasing
  122. #score SARE_BEDROOMSEC 0.611
  123. # Original name: RM_spp_BedroomSec
  124. # 10s/0h of 125078 corpus (104890s/20188h) 03/29/04
  125. # 0s/0h of 15929 corpus (13729s/2200h) 03/29/04
  126. #counts SARE_BEDROOMSEC 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  127. #counts SARE_BEDROOMSEC 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  128. ###############################
  129. # body rules #
  130. ###############################
  131. body FB_SEXOHOL /sexoholics/i
  132. score FB_SEXOHOL 1.66
  133. #counts FB_SEXOHOL 7s/0h of 32370 corpus (24496s/7874h ML) 12/12/05
  134. #counts FB_SEXOHOL 37s/0h of 40658 corpus (35364s/5294h MY) 12/12/05
  135. #counts FB_SEXOHOL 33s/0h of 207630 corpus (200121s/7509h FT) 12/13/05
  136. #counts FB_SEXOHOL 3s/0h of 9809 corpus (4905s/4904h FT) 12/12/05
  137. #counts FB_SEXOHOL 11s/0h of 11532 corpus (6163s/5369h CT) 12/12/05
  138. #counts FB_SEXOHOL 4s/0h of 70031 corpus (30720s/39311h DOC) 12/12/05
  139. #counts FB_SEXOHOL 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  140. #counts FB_SEXOHOL 18s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  141. body FB_XUAL /\bxual\b/
  142. score FB_XUAL 0.68
  143. #counts FB_XUAL 20s/0h of 6871 corpus (5500s/1371h AxB) 12/15/05
  144. #counts FB_XUAL 67s/0h of 34342 corpus (25865s/8477h ML) 12/15/05
  145. #counts FB_XUAL 22s/0h of 40631 corpus (35338s/5293h MY) 12/15/05
  146. #counts FB_XUAL 62s/0h of 70858 corpus (31544s/39314h DOC) 12/15/05
  147. #counts FB_XUAL 855s/0h of 107818 corpus (99658s/8160h FVGT) 03/11/06
  148. #counts FB_XUAL 100s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  149. #counts FB_XUAL 360s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  150. #body FB_NOT_SEX / s[^afeiloprsuw]x\b/i
  151. #score FB_NOT_SEX 1.003
  152. #counts FB_NOT_SEX 7s/7h of 37297 corpus (31824s/5473h MY) 02/07/06
  153. #counts FB_NOT_SEX 4s/4h of 6866 corpus (4638s/2228h AxB) 02/07/06
  154. #counts FB_NOT_SEX 4s/5h of 11694 corpus (6132s/5562h CT) 02/07/06
  155. #counts FB_NOT_SEX 204s/4h of 345244 corpus (337372s/7872h FT) 02/07/06
  156. #counts FB_NOT_SEX 110s/0h of 107818 corpus (99658s/8160h FVGT) 03/11/06
  157. #counts FB_NOT_SEX 23s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  158. #counts FB_NOT_SEX 108s/2h of 140226 corpus (90162s/50064h DOC) 04/19/06
  159. #body FB_GIRLS_DOLLAR /girl\$/i
  160. #score FB_GIRLS_DOLLAR 1.992
  161. #counts FB_GIRLS_DOLLAR 0s/0h of 37297 corpus (31824s/5473h MY) 02/07/06
  162. #counts FB_GIRLS_DOLLAR 0s/0h of 6866 corpus (4638s/2228h AxB) 02/07/06
  163. #counts FB_GIRLS_DOLLAR 0s/0h of 11694 corpus (6132s/5562h CT) 02/07/06
  164. #counts FB_GIRLS_DOLLAR 36s/0h of 345244 corpus (337372s/7872h FT) 02/07/06
  165. #counts FB_GIRLS_DOLLAR 8s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  166. #counts FB_GIRLS_DOLLAR 2s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  167. # 1as$e$
  168. #body FB_DOLLAR_ASS2 /(?:\b|[0-9])(?!ass)a[s\$][s\$](?:\b|e)/i
  169. #score FB_DOLLAR_ASS2 0.361
  170. #counts FB_DOLLAR_ASS2 2s/1h of 9374 corpus (7151s/2223h AxB) 03/01/06
  171. #counts FB_DOLLAR_ASS2 6s/0h of 12244 corpus (6572s/5672h CT) 03/01/06
  172. #counts FB_DOLLAR_ASS2 0s/2h of 27495 corpus (21848s/5647h MY) 03/01/06
  173. #counts FB_DOLLAR_ASS2 13s/0h of 34977 corpus (27086s/7891h FT) 03/01/06
  174. #counts FB_DOLLAR_ASS2 10s/2h of 84470 corpus (67306s/17164h ML) 03/01/06
  175. #counts FB_DOLLAR_ASS2 10s/1h of 103116 corpus (63731s/39385h DOC) 03/01/06
  176. #counts FB_DOLLAR_ASS2 58s/0h of 107818 corpus (99658s/8160h FVGT) 03/11/06
  177. #counts FB_DOLLAR_ASS2 21s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  178. #counts FB_DOLLAR_ASS2 13s/1h of 140226 corpus (90162s/50064h DOC) 04/19/06
  179. body FB_HARD_ERECTION /hard(?:er)? (?:erection|penis)/i
  180. score FB_HARD_ERECTION 1.66
  181. #counts FB_HARD_ERECTION 2728s/0h of 211356 corpus (203977s/7379h FT) 11/23/05
  182. #counts FB_HARD_ERECTION 393s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  183. #counts FB_HARD_ERECTION 573s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  184. #body FB_JACKRABBIT /Jack Rabbit Vibrat[o0]r/i
  185. #score FB_JACKRABBIT 3.599
  186. #counts FB_JACKRABBIT 640s/0h of 211356 corpus (203977s/7379h FT) 11/23/05
  187. #counts FB_JACKRABBIT 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  188. #counts FB_JACKRABBIT 47s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  189. body FB_PENIS /\b(?!penis)p[3e]n[i1!][s5]\b/i
  190. score FB_PENIS 1.66
  191. #counts FB_PENIS 170s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  192. #counts FB_PENIS 386s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  193. body FB_FEMALE_EJACU /female ejaculation/i
  194. score FB_FEMALE_EJACU 1.66
  195. #counts FB_FEMALE_EJACU 4s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  196. #counts FB_FEMALE_EJACU 1s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  197. body FB_INNOCENT /innocent (?:boy|girl|child)/i
  198. score FB_INNOCENT 0.40
  199. #counts FB_INNOCENT 14s/1h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  200. #counts FB_INNOCENT 7s/1h of 140226 corpus (90162s/50064h DOC) 04/19/06
  201. #body LW_PORN_PHOTO /Tell our photographers what to do in their next photo session our video/
  202. #score LW_PORN_PHOTO 5
  203. #describe LW_PORN_PHOTO Standard 'hot chicks' line
  204. #counts LW_PORN_PHOTO 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  205. #counts LW_PORN_PHOTO 3s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  206. #body LW_PORN_ONLINE /high quality photo's online/
  207. #score LW_PORN_ONLINE 2
  208. #describe LW_PORN_ONLINE Standard 'hot chicks' line
  209. #counts LW_PORN_ONLINE 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  210. #counts LW_PORN_ONLINE 4s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  211. #body LW_PORN_MODELS /models getting nasty/
  212. #score LW_PORN_MODELS 5
  213. #describe LW_PORN_MODELS Standard 'hot chicks' line
  214. #counts LW_PORN_MODELS 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  215. #counts LW_PORN_MODELS 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  216. body LW_PORN_HELLO /(?:Hey baby|Hello, stranger!) :\)/
  217. score LW_PORN_HELLO 1.66
  218. describe LW_PORN_HELLO Standard 'hot chicks' line
  219. #counts LW_PORN_HELLO 2s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  220. #counts LW_PORN_HELLO 5s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  221. #$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
  222. # set of porn keywords / when these words appear, it's more likely porn. SET A.
  223. body __FVGT_BREASTS /\bbreasts?\b/i
  224. body __FVGT_FUCK /\bfuck/i
  225. body __FVGT_RAPE /\braped?\b/i
  226. body __FVGT_HORNY /\bhorny\b/i
  227. body __FVGT_VIRGIN /\bvirgins?\b/i
  228. body __FVGT_COCK /\bcock\b/i
  229. body __FVGT_LOLITA /\blolita\b/i
  230. body __FVGT_YOUNGGIRL /Young(?:est)? (?:girl|chick)/i
  231. body __FVGT_PUSSY /\bpuss(?:y|ies)/i
  232. body __FVGT_ASS /\sass\s/i
  233. body __FVGT_SLUT /\bslut\b/i
  234. # meta's to count how many porn words from Set A.
  235. meta FM_PORN_A_4 ((__FVGT_BREASTS + __FVGT_FUCK + __FVGT_RAPE + __FVGT_HORNY + __FVGT_VIRGIN + __FVGT_COCK + __FVGT_LOLITA + __FVGT_YOUNGGIRL + __FVGT_PUSSY + __FVGT_ASS + __FVGT_SLUT) > 2)
  236. meta FM_PORN_A_5 ((__FVGT_BREASTS + __FVGT_FUCK + __FVGT_RAPE + __FVGT_HORNY + __FVGT_VIRGIN + __FVGT_COCK + __FVGT_LOLITA + __FVGT_YOUNGGIRL + __FVGT_PUSSY + __FVGT_ASS + __FVGT_SLUT) > 3)
  237. score FM_PORN_A_4 1.09
  238. #counts FM_PORN_A_4 796s/2h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  239. #counts FM_PORN_A_4 243s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  240. score FM_PORN_A_5 0.98
  241. #counts FM_PORN_A_5 358s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  242. #counts FM_PORN_A_5 172s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  243. body __HAS_COLLECTION /\bcollection\b/i
  244. body __HAS_HARDCORE /\bhardcore\b/i
  245. body __HAS_YOUNGGIRL /\byoung\s?girls?\b/i
  246. body __HAS_ADOLESCENT /\badolescents?\b/i
  247. body __HAS_CHICKS /\bchicks?\b/i
  248. meta FP_MIXED_PORN3 ((__HAS_COLLECTION + __HAS_HARDCORE + __HAS_YOUNGGIRL + __HAS_ADOLESCENT + __HAS_CHICKS) > 2)
  249. score FP_MIXED_PORN3 1.66
  250. #counts FP_MIXED_PORN3 4s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  251. #counts FP_MIXED_PORN3 5s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  252. body SARE_ADULT1 /(?:suck|l[i1]ck).{1,30}(c[o0]ck|d[i1]ck)/i
  253. describe SARE_ADULT1 Contains adult material
  254. score SARE_ADULT1 1.47
  255. # Original name: FVGT_b_ADULT02
  256. # 55s/2h of 119325 corpus (98981s/20344h) 03/21/04
  257. # 18s/0h of 15929 corpus (13729s/2200h) 03/23/04
  258. #counts SARE_ADULT1 512s/1h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  259. #counts SARE_ADULT1 129s/1h of 140226 corpus (90162s/50064h DOC) 04/19/06
  260. body SARE_ADULT2 /\b(?:sorority|rock hard|(adu?(l|1)t|XXX) movies?|climatique|orgas(mic|ims?|ms?)|climax|ejactulate|penis|pussy|cunt|blowjob|intercourse|lubricate)\b/i
  261. describe SARE_ADULT2 Contains adult material
  262. score SARE_ADULT2 1.42
  263. # Original name: MY_XXX_BODY, was rawbody
  264. # 9985s/30h of 119325 corpus (98981s/20344h) 03/21/04
  265. # 683s/2h of 15929 corpus (13729s/2200h) 03/23/04
  266. #counts SARE_ADULT2 4729s/9h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  267. #counts SARE_ADULT2 2685s/34h of 140226 corpus (90162s/50064h DOC) 04/19/06
  268. body SARE_BETTERORG /(?:boost|magnify|multipl[ey]|increase|frequent|intense|intensify).{1,15}orgasm/i
  269. describe SARE_BETTERORG Talks about getting better orgasms
  270. score SARE_BETTERORG 1.66
  271. # Original name: YM_B_BETTER_ORG, RM_bpm_MultipleOrgasms
  272. # 592s/2h of 119325 corpus (98981s/20344h) 03/21/04
  273. # 29s/0h of 15929 corpus (13729s/2200h) 03/23/04
  274. #counts SARE_BETTERORG 249s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  275. #counts SARE_BETTERORG 111s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  276. body SARE_ENLRGYOUR /enlarge your/i
  277. describe SARE_ENLRGYOUR Talks about "enlarging" something
  278. score SARE_ENLRGYOUR 1.02
  279. # Original name: MY_EN_PENIS, was rawbody, RE_bpm_EnlargeYour
  280. # 1735s/0h of 119325 corpus (98981s/20344h) 03/21/04
  281. # 91s/0h of 15929 corpus (13729s/2200h) 03/23/04
  282. #counts SARE_ENLRGYOUR 537s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  283. #counts SARE_ENLRGYOUR 279s/1h of 140226 corpus (90162s/50064h DOC) 04/19/06
  284. body SARE_LRGPNS /(?:bigger|larger|increase your) (?:member\b|rod)/i
  285. describe SARE_LRGPNS Talks about a "bigger" appendage
  286. score SARE_LRGPNS 1.66
  287. # Original name: MY_MEMBER combined with MY_LRGROD
  288. # 50s/0h of 119325 corpus (98981s/20344h) 03/21/04
  289. # 0s/0h of 15929 corpus (13729s/2200h) 03/23/04
  290. #counts SARE_LRGPNS 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  291. #counts SARE_LRGPNS 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  292. body SARE_PNSSIZE /inch(?:es)? .{0,10}(?:cock|dick)/i
  293. describe SARE_PNSSIZE Talks about the size of male body part
  294. score SARE_PNSSIZE 1.66
  295. # Original name: YM_B_BODYPART_1
  296. # 3s/0h of 119325 corpus (98981s/20344h) 03/21/04
  297. # 6s/0h of 15929 corpus (13729s/2200h) 03/23/04
  298. #counts SARE_PNSSIZE 5s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  299. #counts SARE_PNSSIZE 2s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  300. body SARE_SXLIFE /(?:are you single|sex life|youre? partner)/i
  301. describe SARE_SXLIFE Talks about your sex life
  302. score SARE_SXLIFE 1.07
  303. # 695s/15h of 119325 corpus (98981s/20344h) 03/21/04
  304. # 212s/1h of 15929 corpus (13729s/2200h) 03/23/04
  305. #counts SARE_SXLIFE 991s/12h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  306. #counts SARE_SXLIFE 637s/54h of 140226 corpus (90162s/50064h DOC) 04/19/06
  307. body SARE_BEASTUD /be a stud/i
  308. describe SARE_BEASTUD common spammer phrasing
  309. score SARE_BEASTUD 0.26
  310. # Original name: RM_bpm_BeAStud
  311. # 53s/0h of 119325 corpus (98981s/20344h) 03/21/04
  312. # 7s/0h of 15929 corpus (13729s/2200h) 03/23/04
  313. #counts SARE_BEASTUD 73s/2h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  314. #counts SARE_BEASTUD 20s/1h of 140226 corpus (90162s/50064h DOC) 04/19/06
  315. body SARE_BIGRMEMBER /B.?i.?g.?g.?e.?r.{0,5}M.?e.?m.?b.?e.?r/i
  316. describe SARE_BIGRMEMBER mentions bigger body part
  317. score SARE_BIGRMEMBER 1.66
  318. # Original name: RM_bpm_BiggerMember
  319. # 17s/0h of 119325 corpus (98981s/20344h) 03/21/04
  320. # 0s/0h of 15929 corpus (13729s/2200h) 03/23/04
  321. #counts SARE_BIGRMEMBER 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  322. #counts SARE_BIGRMEMBER 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  323. body SARE_INLENGTH /increase.? my length/i
  324. describe SARE_INLENGTH common spammer phrasing
  325. score SARE_INLENGTH 1.66
  326. # Original name: RM_bpm_IncreaseLength
  327. # 40s/0h of 119325 corpus (98981s/20344h) 03/21/04
  328. # 8s/0h of 15929 corpus (13729s/2200h) 03/23/04
  329. #counts SARE_INLENGTH 60s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  330. #counts SARE_INLENGTH 20s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  331. #body SARE_LADYINLIFE /lady in your life/i
  332. #describe SARE_LADYINLIFE Contains phrasing used by spammers
  333. #score SARE_LADYINLIFE 0.166
  334. # Original name: RM_bpm_LadyInLife
  335. # 3s/0h of 119325 corpus (98981s/20344h) 03/21/04
  336. # 0s/0h of 15929 corpus (13729s/2200h) 03/23/04
  337. #counts SARE_LADYINLIFE 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  338. #counts SARE_LADYINLIFE 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  339. #body SARE_MAGICLUBE /"Magic Lubricant"/i
  340. #describe SARE_MAGICLUBE Spammer phrasing in body of email
  341. #score SARE_MAGICLUBE 2.222 # type=spamgg
  342. # Original name: RM_bpm_MagicLubricant
  343. # 704s/0h of 119325 corpus (98981s/20344h) 03/21/04
  344. # 12s/0h of 15929 corpus (13729s/2200h) 03/23/04
  345. #counts SARE_MAGICLUBE 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  346. #counts SARE_MAGICLUBE 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  347. body SARE_NOEMBARRASS /no embarrassing/i
  348. describe SARE_NOEMBARRASS Wow, I won't be embarrassed anymore!
  349. score SARE_NOEMBARRASS 1.66
  350. # Original name: RM_bpm_NoEmbarrassing
  351. # 30s/0h of 119325 corpus (98981s/20344h) 03/21/04
  352. # 6s/0h of 15929 corpus (13729s/2200h) 03/23/04
  353. #counts SARE_NOEMBARRASS 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  354. #counts SARE_NOEMBARRASS 1s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  355. body SARE_PLEASEPARTNR /Pleasure.{1,10}partner/i
  356. describe SARE_PLEASEPARTNR common spammer phrasing
  357. score SARE_PLEASEPARTNR 1.66
  358. # Original name: RM_bpm_PleasurePartnr
  359. # 51s/0h of 119325 corpus (98981s/20344h) 03/21/04
  360. # 6s/0h of 15929 corpus (13729s/2200h) 03/23/04
  361. #counts SARE_PLEASEPARTNR 60s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  362. #counts SARE_PLEASEPARTNR 20s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  363. #body SARE_POWERBOTTLE /"Power Bottle"/i
  364. #describe SARE_POWERBOTTLE Spammer phrasing in body of email
  365. # score SARE_POWERBOTTLE 2.222 # type=spamgg
  366. # Original name: RM_bpm_PowerBottle
  367. # 708s/0h of 119325 corpus (98981s/20344h) 03/21/04
  368. # 12s/0h of 15929 corpus (13729s/2200h) 03/23/04
  369. #counts SARE_POWERBOTTLE 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  370. #counts SARE_POWERBOTTLE 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  371. #body SARE_PRODEREC /produce erections/i
  372. #describe SARE_PRODEREC Contains medical spam phrasing
  373. #score SARE_PRODEREC 0.055
  374. # Original name: RE_bpm_ProdErec
  375. # 1s/0h of 119325 corpus (98981s/20344h) 03/21/04
  376. # 0s/0h of 15929 corpus (13729s/2200h) 03/23/04
  377. #counts SARE_PRODEREC 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  378. #counts SARE_PRODEREC 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  379. body SARE_SUPERVIAGRA /(?:super|weekend)[- ]viagra/i
  380. describe SARE_SUPERVIAGRA mentions drug which is often subject of spam
  381. score SARE_SUPERVIAGRA 1.66 # type=spamgg
  382. # Original name: RM_bpm_SuperViagra, RM_bpm_WeekendViagra
  383. # 299s/0h of 119325 corpus (98981s/20344h) 03/21/04
  384. # 11s/0h of 15929 corpus (13729s/2200h) 03/23/04
  385. #counts SARE_SUPERVIAGRA 136s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  386. #counts SARE_SUPERVIAGRA 704s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  387. body SARE_ADLTDATING /adult dating/i
  388. describe SARE_ADLTDATING Contains phrasing used by spammers
  389. score SARE_ADLTDATING 0.32
  390. # Original name: RM_bpp_Adultdating
  391. # 3s/0h of 119325 corpus (98981s/20344h) 03/21/04
  392. # 0s/0h of 15929 corpus (13729s/2200h) 03/23/04
  393. #counts SARE_ADLTDATING 1s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  394. #counts SARE_ADLTDATING 32s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  395. body SARE_ADLTPRSNLS /adult personals/i
  396. describe SARE_ADLTPRSNLS Contains phrasing used by spammers
  397. score SARE_ADLTPRSNLS 1.66
  398. # Original name: RM_bpp_AdultPersonals
  399. # 3s/0h of 119325 corpus (98981s/20344h) 03/21/04
  400. # 2s/0h of 15929 corpus (13729s/2200h) 03/23/04
  401. #counts SARE_ADLTPRSNLS 1s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  402. #counts SARE_ADLTPRSNLS 13s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  403. #body SARE_AREUBORED /Are you bored of/i
  404. #describe SARE_AREUBORED Contains phrasing used by spammers
  405. #score SARE_AREUBORED 0.111
  406. # Original name: RM_bpp_AreYouBored
  407. # 2s/0h of 119325 corpus (98981s/20344h) 03/21/04
  408. # 3s/0h of 15929 corpus (13729s/2200h) 03/23/04
  409. #counts SARE_AREUBORED 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  410. #counts SARE_AREUBORED 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  411. body SARE_CHILDPRN1 /child porn/i
  412. describe SARE_CHILDPRN1 contains reference to child porn
  413. score SARE_CHILDPRN1 1.15 # ham: news, FBI auto-responder
  414. # Original name: ChildPorn
  415. # 64s/3h of 119325 corpus (98981s/20344h) 03/21/04
  416. # 5s/0h of 15929 corpus (13729s/2200h) 03/23/04
  417. #counts SARE_CHILDPRN1 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  418. #counts SARE_CHILDPRN1 1s/1h of 140226 corpus (90162s/50064h DOC) 04/19/06
  419. #body SARE_CHILDPRN2 /child pornography webmaster/i
  420. #describe SARE_CHILDPRN2 contains reference to a child porn webmaster
  421. #score SARE_CHILDPRN2 2.222 # type=spamg
  422. # Original name: RM_bpp_ChildPorn2
  423. # 9s/0h of 119325 corpus (98981s/20344h) 03/21/04
  424. # 0s/0h of 15929 corpus (13729s/2200h) 03/23/04
  425. #counts SARE_CHILDPRN2 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  426. #counts SARE_CHILDPRN2 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  427. #body SARE_CHILDPRN3 /underage porn/i
  428. #describe SARE_CHILDPRN3 contains reference to child porn
  429. #score SARE_CHILDPRN3 2.222 # type=spamg
  430. # Original name: RM_bpp_ChildPorn3
  431. # 28s/0h of 119325 corpus (98981s/20344h) 03/21/04
  432. # 5s/0h of 15929 corpus (13729s/2200h) 03/23/04
  433. #counts SARE_CHILDPRN3 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  434. #counts SARE_CHILDPRN3 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  435. body SARE_TOWRITE /decided to write/i
  436. describe SARE_TOWRITE Contains phrasing used by spammers
  437. score SARE_TOWRITE 1.05
  438. # Original name: RM_bpp_DecidedToWrite
  439. # 41s/2h of 119325 corpus (98981s/20344h) 03/21/04
  440. # 2s/0h of 15929 corpus (13729s/2200h) 03/23/04
  441. #counts SARE_TOWRITE 6s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  442. #counts SARE_TOWRITE 11s/3h of 140226 corpus (90162s/50064h DOC) 04/19/06
  443. #body SARE_DRMWOMAN /your dream woman/i
  444. #describe SARE_DRMWOMAN Contains phrasing used by spammers
  445. #score SARE_DRMWOMAN 0.055
  446. # Original name: RM_bpp_DreamWoman
  447. # 1s/0h of 119325 corpus (98981s/20344h) 03/21/04
  448. # 0s/0h of 15929 corpus (13729s/2200h) 03/23/04
  449. #counts SARE_DRMWOMAN 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  450. #counts SARE_DRMWOMAN 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  451. body SARE_GETFCK /get fuck/i
  452. describe SARE_GETFCK Contains phrasing used by spammers
  453. score SARE_GETFCK 1.66 # type=spamp
  454. # Original name: RM_bpp_GetFucked
  455. # 22s/0h of 119325 corpus (98981s/20344h) 03/21/04
  456. # 8s/0h of 15929 corpus (13729s/2200h) 03/23/04
  457. #counts SARE_GETFCK 71s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  458. #counts SARE_GETFCK 32s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  459. #body SARE_GIRLSDOANY /girls will do anything/i
  460. #describe SARE_GIRLSDOANY Contains phrasing used by spammers
  461. #score SARE_GIRLSDOANY 0.166
  462. # Original name: RM_bpp_GirlsDoAny
  463. # 3s/0h of 119325 corpus (98981s/20344h) 03/21/04
  464. # 3s/0h of 15929 corpus (13729s/2200h) 03/23/04
  465. #counts SARE_GIRLSDOANY 1s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  466. #counts SARE_GIRLSDOANY 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  467. #body SARE_HORNY2 /horny as hell/i
  468. #describe SARE_HORNY2 Contains phrasing used by spammers
  469. #score SARE_HORNY2 0.222
  470. # Original name: RM_bpp_HornyAsHell
  471. # 4s/0h of 119325 corpus (98981s/20344h) 03/21/04
  472. # 3s/0h of 15929 corpus (13729s/2200h) 03/23/04
  473. #counts SARE_HORNY2 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  474. #counts SARE_HORNY2 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  475. #body SARE_MOMBLOW /mother blows/i
  476. #describe SARE_MOMBLOW textual phrase implies porn spam
  477. #score SARE_MOMBLOW 0.111
  478. # Original name: RM_bpp_MotherBlows
  479. # 2s/0h of 119325 corpus (98981s/20344h) 03/21/04
  480. # 0s/0h of 15929 corpus (13729s/2200h) 03/23/04
  481. #counts SARE_MOMBLOW 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  482. #counts SARE_MOMBLOW 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  483. body SARE_BADGIRLS /(?:amateur|horny|asian) girls/i
  484. describe SARE_BADGIRLS Contains phrasing used by spammers
  485. score SARE_BADGIRLS 0.52
  486. # Original name: RM_bpp_PornGirls
  487. # 12s/0h of 119325 corpus (98981s/20344h) 03/21/04
  488. # 9s/0h of 15929 corpus (13729s/2200h) 03/23/04
  489. #counts SARE_BADGIRLS 21s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  490. #counts SARE_BADGIRLS 5s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  491. body SARE_QLTYSINGLES /quality singles/i
  492. describe SARE_QLTYSINGLES Contains phrasing seen in spam
  493. score SARE_QLTYSINGLES 1.66
  494. # Original name: RM_bpp_QualitySingles
  495. # 3s/0h of 119325 corpus (98981s/20344h) 03/21/04
  496. # 0s/0h of 15929 corpus (13729s/2200h) 03/23/04
  497. #counts SARE_QLTYSINGLES 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  498. #counts SARE_QLTYSINGLES 1s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  499. #body SARE_HORNY1 /so hoo+rny/i
  500. #describe SARE_HORNY1 Contains phrasing used by spammers
  501. #score SARE_HORNY1 1.000 # type=spamp
  502. # Original name: SoHorny
  503. # 1s/0h of 119325 corpus (98981s/20344h) 03/21/04
  504. # 0s/0h of 15929 corpus (13729s/2200h) 03/23/04
  505. #counts SARE_HORNY1 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  506. #counts SARE_HORNY1 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  507. #body SARE_SONSDICK /son's dick/i
  508. #describe SARE_SONSDICK textual phrase implies porn spam
  509. #score SARE_SONSDICK 1.000 # type=spamp
  510. # Original name: SonsDick
  511. # 2s/0h of 119325 corpus (98981s/20344h) 03/21/04
  512. # 0s/0h of 15929 corpus (13729s/2200h) 03/23/04
  513. #counts SARE_SONSDICK 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  514. #counts SARE_SONSDICK 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  515. body SARE_STILLSINGLE /still single/i
  516. describe SARE_STILLSINGLE Contains phrasing used by spammers
  517. score SARE_STILLSINGLE 1.66
  518. # Original name: RM_bpp_StillSingle
  519. # 11s/0h of 119325 corpus (98981s/20344h) 03/21/04
  520. # 5s/0h of 15929 corpus (13729s/2200h) 03/23/04
  521. #counts SARE_STILLSINGLE 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  522. #counts SARE_STILLSINGLE 71s/1h of 140226 corpus (90162s/50064h DOC) 04/19/06
  523. #body SARE_UNDRESSMTHR /undressed mother/i
  524. #describe SARE_UNDRESSMTHR textual phrase implies porn spam
  525. #score SARE_UNDRESSMTHR 0.200
  526. # Original name: RM_bpp_UndressedMother
  527. # 2s/0h of 119325 corpus (98981s/20344h) 03/21/04
  528. # 0s/0h of 15929 corpus (13729s/2200h) 03/23/04
  529. #counts SARE_UNDRESSMTHR 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  530. #counts SARE_UNDRESSMTHR 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  531. body SARE_HOUSEWIVES /housewives/i
  532. describe SARE_HOUSEWIVES Mentions housewives, as in porn or in-home biz
  533. score SARE_HOUSEWIVES 0.99
  534. # Original name: RM_bwp_housewives
  535. # 138s/0h of 119325 corpus (98981s/20344h) 03/21/04
  536. # 18s/0h of 15929 corpus (13729s/2200h) 03/23/04
  537. #counts SARE_HOUSEWIVES 13s/3h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  538. #counts SARE_HOUSEWIVES 37s/6h of 140226 corpus (90162s/50064h DOC) 04/19/06
  539. body SARE_SCHLGRL /schoolgirls/i
  540. describe SARE_SCHLGRL mentions schoolgirls, as in porn
  541. score SARE_SCHLGRL 1.29
  542. # Original name: RM_bwp_schoolgirls
  543. # 11s/0h of 119325 corpus (98981s/20344h) 03/21/04
  544. # 6s/0h of 15929 corpus (13729s/2200h) 03/23/04
  545. #counts SARE_SCHLGRL 15s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  546. #counts SARE_SCHLGRL 19s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  547. ###############################
  548. # OBFU body rules #
  549. ###############################
  550. body SARE_ADLTOBFU /\b(?:adu?1t|amb[1!]en|b0y|bl0w|c0cks?|c0re|d0main|f?r0m|g(?:[1!]r[1l]|ir[!1])|[1!]ntercourse|l1ttle|l0se|mai1|manh00d|m0vie|[0o]rg\@sm|p[0\@]rn|p1ct|pen[1!]s|(?:ph|f)(?:[0\@]t|ot[0\@])|pu[s5]{1,2}[1!]e[s5]|secks|sm00th|t1ny|t1ts|v(?:irg1|1rgi|1rg1)n|v[i1]de0|y0ung|y0ur)/i
  551. describe SARE_ADLTOBFU Contains OBFU adult material
  552. score SARE_ADLTOBFU 0.68
  553. # Combined from FVGT_b_N0N0_WORDS, OACYS_DISGUISED_P0RN, M_K_N0N0_WORDS_BODY
  554. # 768s/1h of 119325 corpus (98981s/20344h) 03/21/04
  555. # 89s/0h of 15929 corpus (13729s/2200h) 03/23/04
  556. #counts SARE_ADLTOBFU 930s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  557. #counts SARE_ADLTOBFU 663s/10h of 140226 corpus (90162s/50064h DOC) 04/19/06
  558. body SARE_OBFUENLARGE /\b(?!enlarge)e.?n.?l.?a.?r.?g.?e/i
  559. describe SARE_OBFUENLARGE masked spam word(s)
  560. score SARE_OBFUENLARGE 1.66 # type=obfu
  561. # Original name: RM_bwo_Enlarge
  562. # 478s/0h of 119325 corpus (98981s/20344h) 03/21/04
  563. # 18s/0h of 15929 corpus (13729s/2200h) 03/23/04
  564. #counts SARE_OBFUENLARGE 15s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  565. #counts SARE_OBFUENLARGE 466s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  566. #body SARE_OBFUFCK1 /(?!\bfuck)(?:\bf|\B(?:\xC5\xBF|\xC6\x92|\xD2[\x92-\x93]))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[uv\xB5\xD9-\xDC\xF9-\xFC]|\xC5[\xA8-\xB3]|\xC6[\xAF-\xB0]|\xC7[\x93-\x9C]|\xCE\xB0|\xCE\xBC|\xCF\x8B|\xCF\x8D|\xD4\xB1|\xD5\x84|\xD5\x8D)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[c\xC7\xE7\xA2\xA9]|\xC4[\x86-\x8D]|\xD0\xA1|\xD1\x81)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:k|\xC4[\xB6-\xB8]|\xCE\x9A|\xCE\xBA|\xD0\x8C|\xD0\x9A|\xD0\xBA|\xD1\x9C|\xD2[\x9A-\x9D]])/i
  567. #describe SARE_OBFUFCK1 Apparent spam seems to contain porn subject
  568. #score SARE_OBFUFCK1 1.666 # type=obfu
  569. # Original name: RM_bwo_Fucko1
  570. # 42s/0h of 119325 corpus (98981s/20344h) 03/21/04
  571. # 23s/0h of 15929 corpus (13729s/2200h) 03/23/04
  572. #counts SARE_OBFUFCK1 19s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  573. #counts SARE_OBFUFCK1 35s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  574. body SARE_OBFUFCK2 /(?!\bfun?ck)\bf.?u.?c.?k/i
  575. describe SARE_OBFUFCK2 Apparent spam seems to contain porn subject
  576. score SARE_OBFUFCK2 1.00 # type=obfu
  577. # Original name: RM_bwo_Fucko2
  578. # 70s/1h of 119325 corpus (98981s/20344h) 03/21/04
  579. # 29s/0h of 15929 corpus (13729s/2200h) 03/23/04
  580. #counts SARE_OBFUFCK2 56s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  581. #counts SARE_OBFUFCK2 73s/3h of 140226 corpus (90162s/50064h DOC) 04/19/06
  582. #body SARE_OBFUGNGBNG /(?!\bgangbang(ed)?\b)(?:\b[g6]|\B(?:\xC4[\x9C-\xA3]))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[a4\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[g6]|\xC4[\x9C-\xA3]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[b8\xDF]|\xCE\x92|\xCE\xB2|\xD0\x92|\xD0\xB2)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[a4\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[g6]|\xC4[\x9C-\xA3]])((?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[d\xD0]|\xC4[\x8E-\x91]))?\b/i
  583. #describe SARE_OBFUGNGBNG masked spam word(s)
  584. #score SARE_OBFUGNGBNG 2.5 # type=obfu
  585. # Original name: RM_bwo_Gangbang
  586. # 2s/0h of 15929 corpus (13729s/2200h) 03/23/04
  587. # 3s/0h of 119325 corpus (98981s/20344h) 03/21/04
  588. #counts SARE_OBFUGNGBNG 11s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  589. #counts SARE_OBFUGNGBNG 1s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  590. #body SARE_OBFUGIRLS /(?!\bgirls?\b)(?:\b[g6]|\B(?:\xC4[\x9C-\xA3]))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[l1I\|\xA3]|(?:\xC5[\x80-\x82]|\xC4[\xB9-\xBF]))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)?\b/i
  591. #describe SARE_OBFUGIRLS masked spam word(s)
  592. #score SARE_OBFUGIRLS 3.222 # type=obfu # ham: jpg
  593. # Original name: RM_bwo_Girls
  594. # 25s/1h of 15929 corpus (13729s/2200h) 03/23/04
  595. # 318s/1h of 119325 corpus (98981s/20344h) 03/21/04
  596. #counts SARE_OBFUGIRLS 112s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  597. #counts SARE_OBFUGIRLS 13s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  598. #body SARE_OBFUPENIS /(?!\bpen ?is\b)(?:\bp|\B(?:[\xDE]|\xCE\xA1|\xCF\x81|\xD0\xA0|\xD1\x80))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5]\b|(?:[\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)\B)/i
  599. #describe SARE_OBFUPENIS masked spam word(s)
  600. #score SARE_OBFUPENIS 2.333 # type=obfu
  601. # Original name: RM_bwo_Penis
  602. # 1027s/0h of 119325 corpus (98981s/20344h) 03/21/04
  603. # 91s/1h of 15929 corpus (13729s/2200h) 03/23/04
  604. #counts SARE_OBFUPENIS 516s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  605. #counts SARE_OBFUPENIS 578s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  606. #body SARE_OBFUPORNO /(?!\bporno?\b)(?:\bp|\B(?:[\xDE]|\xCE\xA1|\xCF\x81|\xD0\xA0|\xD1\x80))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)?\b/i
  607. #describe SARE_OBFUPORNO masked spam word(s)
  608. # score SARE_OBFUPORNO 2.500 # type=obfu
  609. # Original name: RM_bwo_Porno
  610. # 266s/0h of 119325 corpus (98981s/20344h) 03/21/04
  611. # 36s/0h of 15929 corpus (13729s/2200h) 03/23/04
  612. #counts SARE_OBFUPORNO 43s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  613. #counts SARE_OBFUPORNO 22s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  614. #body SARE_OBFUPUSS /(?!\bpussies\b)(?:\bp|\B(?:[\xDE]|\xCE\xA1|\xCF\x81|\xD0\xA0|\xD1\x80))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[uv\*\xB5\xD9-\xDC\xF9-\xFC]|\xC5[\xA8-\xB3]|\xC6[\xAF-\xB0]|\xC7[\x93-\x9C]|\xCE\xB0|\xCE\xBC|\xCF\x8B|\xCF\x8D|\xD4\xB1|\xD5\x84|\xD5\x8D)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5]\b|(?:[\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)\B)/i
  615. #describe SARE_OBFUPUSS masked spam word(s)
  616. #score SARE_OBFUPUSS 2.500 # type=obfu
  617. # Original name: RM_bwo_Pussies
  618. # 28s/0h of 119325 corpus (98981s/20344h) 03/21/04
  619. # 27s/0h of 15929 corpus (13729s/2200h) 03/23/04
  620. #counts SARE_OBFUPUSS 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  621. #counts SARE_OBFUPUSS 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  622. body SARE_OBFUSEXUAL /\b(?!Sexual)S.?e.?x.?u.?a.?l/i
  623. describe SARE_OBFUSEXUAL masked spam word(s)
  624. score SARE_OBFUSEXUAL 1.66 # type=obfu
  625. # Original name:
  626. # 409s/0h of 119325 corpus (98981s/20344h) 03/21/04
  627. # 27s/0h of 15929 corpus (13729s/2200h) 03/23/04
  628. #counts SARE_OBFUSEXUAL 676s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  629. #counts SARE_OBFUSEXUAL 373s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  630. #body SARE_OBFUTEENS /(?!\bteens?\b)(?:\bt|\B(?:[\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)?\b/i
  631. #describe SARE_OBFUTEENS masked spam word(s)
  632. #score SARE_OBFUTEENS 2.500 # type=obfu
  633. # Original name: RM_bwo_Teens
  634. # 28s/0h of 119325 corpus (98981s/20344h) 03/21/04
  635. # 4s/0h of 15929 corpus (13729s/2200h) 03/23/04
  636. #counts SARE_OBFUTEENS 1s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  637. #counts SARE_OBFUTEENS 1s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  638. body SARE_OBFUTESTO /\b(?!testosterone)t.?e.?s.?t.?o.?s.?t.?e.?r.?o.?n.?e/i
  639. describe SARE_OBFUTESTO masked spam word(s)
  640. score SARE_OBFUTESTO 1.66 # type=obfu
  641. # Original name: RM_bwo_Testosterone
  642. # 10s/0h of 119325 corpus (98981s/20344h) 03/21/04
  643. # 0s/0h of 15929 corpus (13729s/2200h) 03/23/04
  644. #counts SARE_OBFUTESTO 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  645. #counts SARE_OBFUTESTO 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  646. #body SARE_OBFUVRGN /(?!\bvirgins?\b)(?:\b[vu]|\B(?:\\\/|\xCE\xBD))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[g6]|\xC4[\x9C-\xA3]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)?\b/i
  647. #describe SARE_OBFUVRGN masked spam word(s)
  648. #score SARE_OBFUVRGN 2.500 # type=obfu
  649. # Original name: RM_bwo_Virgins
  650. # 25s/0h of 119325 corpus (98981s/20344h) 03/21/04
  651. # 16s/0h of 15929 corpus (13729s/2200h) 03/23/04
  652. #counts SARE_OBFUVRGN 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  653. #counts SARE_OBFUVRGN 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  654. #body SARE_SPRDLGS /spread(?:ing)? their leg/i
  655. #describe SARE_SPRDLGS Contains possible adult phrase
  656. #score SARE_SPRDLGS 0.222
  657. # 4s/0h of 125078 corpus (104890s/20188h) 03/29/04
  658. # 0s/0h of 15929 corpus (13729s/2200h) 03/29/04
  659. #counts SARE_SPRDLGS 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  660. #counts SARE_SPRDLGS 2s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  661. body SARE_RPTLETTERS /(?!\b(?:ass|cock|pussy)\b)\b(?:a+s+s+|c+o+c+k+|p+u+s+s+y+)\b/i
  662. describe SARE_RPTLETTERS Contains mis-spelled adult phrase(s)
  663. score SARE_RPTLETTERS 1.66
  664. # 5s/0h of 125078 corpus (104890s/20188h) 03/29/04
  665. # 2s/0h of 15929 corpus (13729s/2200h) 03/29/04
  666. #counts SARE_RPTLETTERS 15s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  667. #counts SARE_RPTLETTERS 1s/1h of 140226 corpus (90162s/50064h DOC) 04/19/06
  668. body SARE_SEXDRIVE /\bSex(?:ual)? Drive/i
  669. describe SARE_SEXDRIVE Talks about sex drive
  670. score SARE_SEXDRIVE 1.66
  671. # Original name: RM_bpm_SexDrive
  672. # 589s/0h of 125078 corpus (104890s/20188h) 03/29/04
  673. # 141s/0h of 15929 corpus (13729s/2200h) 03/29/04
  674. #counts SARE_SEXDRIVE 239s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  675. #counts SARE_SEXDRIVE 531s/5h of 140226 corpus (90162s/50064h DOC) 04/19/06
  676. body SARE_BETTERSEX /better sex/i
  677. describe SARE_BETTERSEX Spammer phrasing in body of email
  678. score SARE_BETTERSEX 1.66
  679. # Original name: RM_bpm_BetterSex
  680. # 157s/0h of 125078 corpus (104890s/20188h) 03/29/04
  681. # 8s/0h of 15929 corpus (13729s/2200h) 03/29/04
  682. #counts SARE_BETTERSEX 292s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  683. #counts SARE_BETTERSEX 262s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  684. body SARE_SEXENHANCER /sex(?:ual)? enhancer/i
  685. describe SARE_SEXENHANCER mentions spam topic
  686. score SARE_SEXENHANCER 1.66 # type=spamp
  687. # Original name: RM_bpm_SexEnhancer
  688. # 11s/0h of 125078 corpus (104890s/20188h) 03/29/04
  689. # 11s/0h of 15929 corpus (13729s/2200h) 03/29/04
  690. #counts SARE_SEXENHANCER 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  691. #counts SARE_SEXENHANCER 7s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  692. #body SARE_OBFUHARDCORE /(?!hard[ -]?core)(?:\bh|\B(?:\xC4[\xA4-\xA7]|\xCE\x89|\xCE\x97|\xD0\x9D|\xD0\xBD|\xD1\x92|\xD2[\xA2-\xA3]|\xD2[\xBA-\xBB]|\xD5\xB0))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[a4\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[d\xD0]|\xC4[\x8E-\x91])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[c\*\xC7\xE7\xA2\xA9]|\xC4[\x86-\x8D]|\xD0\xA1|\xD1\x81)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3]\b|(?:[\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)\B)/i
  693. #describe SARE_OBFUHARDCORE masked spam word(s)
  694. #score SARE_OBFUHARDCORE 1.433 # type=obfu
  695. # Original name: RM_bwo_hardcore
  696. # 32s/0h of 98440 corpus (76828s/21612h) 05/09/04
  697. #counts SARE_OBFUHARDCORE 4s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  698. #counts SARE_OBFUHARDCORE 3s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  699. ###############################
  700. # uri rules #
  701. ###############################
  702. #uri SARE_PNSPTCH /\bbolik34\b/i
  703. #describe SARE_PNSPTCH Terra.es penil patch spammer
  704. #score SARE_PNSPTCH 1.5 # was .33
  705. # Original name: MAKEPENIBIG
  706. # 277s/0h of 119325 corpus (98981s/20344h) 03/21/04
  707. # 0s/0h of 15929 corpus (13729s/2200h) 03/23/04
  708. #counts SARE_PNSPTCH 0s/0h of 42056 corpus (34127s/7929h FVGT) 04/19/06
  709. #counts SARE_PNSPTCH 0s/0h of 140226 corpus (90162s/50064h DOC) 04/19/06
  710. # EOF