badwords.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. #include "pch.h"
  2. #include "badwords.h"
  3. // note, this became much more complex thatn I have time for, so I'm bailing out
  4. // and using a cheeseball algorithm that is probably on the order of n^2 for the
  5. // text to be filtered. If it looks bad on perf, then I'll continue to develop
  6. // the correct approach
  7. /*
  8. //------------------------------------------------------------------------------
  9. // avoid polluting the global namespaces
  10. //------------------------------------------------------------------------------
  11. namespace FilterAutomata
  12. {
  13. //--------------------------------------------------------------------------
  14. // valid action codes
  15. //--------------------------------------------------------------------------
  16. enum ActionCode
  17. {
  18. ACTION_DO_NOTHING,
  19. ACTION_STORE_CURRENT_POINTER,
  20. ACTION_WIPE_CURRENT_RANGE
  21. };
  22. //--------------------------------------------------------------------------
  23. // a cell in a state
  24. //--------------------------------------------------------------------------
  25. struct Cell
  26. {
  27. ActionCode actionCode;
  28. short iNextState;
  29. Cell (void) : actionCode (ACTION_DO_NOTHING), iNextState (0) {}
  30. operator = (const Cell& cell)
  31. {
  32. actionCode = cell.actionCode;
  33. iNextState = cell.iNextState;
  34. }
  35. };
  36. //--------------------------------------------------------------------------
  37. // a state
  38. //--------------------------------------------------------------------------
  39. struct State
  40. {
  41. Cell cell[256];
  42. operator = (const State& state)
  43. {
  44. for (int i = 0; i < 256; i++)
  45. cell[i] = state.cell[i];
  46. }
  47. };
  48. //--------------------------------------------------------------------------
  49. // the actual finite state machine
  50. //--------------------------------------------------------------------------
  51. struct Machine
  52. {
  53. private:
  54. State* pState;
  55. unsigned int iStateCount;
  56. unsigned int iNextState;
  57. public:
  58. Machine (void) : iStateCount (16), iNextState (0), pState (new State[iStateCount]) {}
  59. State* GetState (unsigned int iState)
  60. {
  61. while (iNextState >= iStateCount)
  62. {
  63. // automatically double the size of the table each time we go over what we currently have
  64. int iNewStateCount = iStateCount * 2;
  65. State* pNewState = new State[iNewStateCount];
  66. for (int i = 0; i < iStateCount; i++)
  67. pNewState[i] = pState[i];
  68. iStateCount = iNewStateCount;
  69. delete[] pState;
  70. pState = pNewState;
  71. debugf ("Doubled FSM table size to %d\n", iStateCount);
  72. }
  73. return pState + iState;
  74. }
  75. unsigned int GetNextState (void)
  76. {
  77. return iNextState++;
  78. }
  79. };
  80. //--------------------------------------------------------------------------
  81. };
  82. using FilterAutomata;
  83. //------------------------------------------------------------------------------
  84. // global variable
  85. //------------------------------------------------------------------------------
  86. Machine* g_pFilterAutomata = 0;
  87. //------------------------------------------------------------------------------
  88. // function to add a word to the automata
  89. //------------------------------------------------------------------------------
  90. void AddWordToFilterAutomata (char* pDirtyWord)
  91. {
  92. // print out the word to confirm
  93. debugf ("Adding Dirty Word (%s)...", pDirtyWord);
  94. if (!g_pFilterAutomata)
  95. g_pFilterAutomata = new Machine;
  96. // first, run through the machine until we reach a point where we have raw data
  97. int iStartState = 0;
  98. State* pState = g_pFilterAutomata->GetState (iStartState);
  99. while (*pDirtyWord && pState->cell[*pDirtyWord].nextState != 0)
  100. {
  101. pState = g_pFilterAutomata->GetState (pState->cell[*pDirtyWord].nextState);
  102. pDirtyWord++;
  103. }
  104. if (*pDirtyWord)
  105. {
  106. // here we have exhausted the word, so we should fill
  107. }
  108. else
  109. {
  110. }
  111. debugf ("done\n");
  112. }
  113. */
  114. //------------------------------------------------------------------------------
  115. // whether or not to censor
  116. //------------------------------------------------------------------------------
  117. bool g_bCensor = true;
  118. //------------------------------------------------------------------------------
  119. // bad word storage
  120. //------------------------------------------------------------------------------
  121. char** g_pBadWordList = 0; // this has a memory leak, but it is only one per app, so it shouldn't be a real problem
  122. int g_iBadWordListSize = 0;
  123. int g_iBadWordListMaxSize = 0;
  124. //------------------------------------------------------------------------------
  125. // bad word table management
  126. //------------------------------------------------------------------------------
  127. void AddWord (char* pDirtyWord)
  128. {
  129. // initialize the table if it hasn't already been initialized
  130. if (g_iBadWordListMaxSize == 0)
  131. {
  132. g_iBadWordListMaxSize = 8;
  133. g_pBadWordList = new char*[g_iBadWordListMaxSize];
  134. }
  135. // grow the table if necessary
  136. while (g_iBadWordListSize >= g_iBadWordListMaxSize)
  137. {
  138. int iNewSize = g_iBadWordListMaxSize * 2;
  139. char** pNewBadWordList = new char*[iNewSize];
  140. for (int i = 0; i < g_iBadWordListMaxSize; i++)
  141. pNewBadWordList[i] = g_pBadWordList[i];
  142. delete[] g_pBadWordList;
  143. g_iBadWordListMaxSize = iNewSize;
  144. g_pBadWordList = pNewBadWordList;
  145. }
  146. // store the word in the table
  147. g_pBadWordList[g_iBadWordListSize++] = pDirtyWord;
  148. }
  149. //------------------------------------------------------------------------------
  150. // bad word table sort function
  151. //------------------------------------------------------------------------------
  152. int sortfunc (const void* a, const void* b)
  153. {
  154. return strlen (*(const char**)(b)) - strlen(*(const char**)(a));
  155. }
  156. //------------------------------------------------------------------------------
  157. // abbreviations for important constants
  158. //------------------------------------------------------------------------------
  159. #define ENDWORD 0x0d
  160. #define ENDWORDLIST 0x20
  161. //------------------------------------------------------------------------------
  162. // function to build the automata that will be used to do the filtering
  163. //------------------------------------------------------------------------------
  164. void BuildFilterAutomata (char* pBuffer)
  165. {
  166. // skip to the end of the zone header
  167. while (*pBuffer != ENDWORD)
  168. pBuffer++;
  169. // now skip to the beginning of the first dirty word
  170. pBuffer += 2;
  171. // now, for each word in the list, install it into the table
  172. while (*pBuffer != ENDWORDLIST)
  173. {
  174. // save the pointer to the beginning of the current word
  175. char* pDirtyWord = pBuffer;
  176. // advance the pointer to the end of the word
  177. while (*pBuffer != ENDWORD)
  178. pBuffer++;
  179. // set the null on the end of the word and advance to the beginning
  180. // of the next dirty word (skip the newline after the carriage return)
  181. *pBuffer++ = 0;
  182. pBuffer++;
  183. // add the word to the automata
  184. //AddWordToFilterAutomata (pDirtyWord);
  185. AddWord (pDirtyWord);
  186. }
  187. // now sort the list by length
  188. qsort (g_pBadWordList, g_iBadWordListSize, sizeof (char*), sortfunc);
  189. }
  190. //------------------------------------------------------------------------------
  191. // table to look up lower case conversions
  192. //------------------------------------------------------------------------------
  193. // "\0........\t\n..\r.................. !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~................................................................................................................................."
  194. char* gszConvertToLowerCase = "\0........\t\n..\r.................. !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~.................................................................................................................................";
  195. //------------------------------------------------------------------------------
  196. // resource id and other defines
  197. //------------------------------------------------------------------------------
  198. #define IDR_BADWORDS 1123
  199. #define BADWORDS_DLL_NAME "allbad"
  200. #define DLL_SUFFIX ".dll"
  201. //------------------------------------------------------------------------------
  202. // function to load the bad words
  203. //------------------------------------------------------------------------------
  204. void LoadBadWords (void)
  205. {
  206. // load the bad words resource dll from the artwork directory
  207. char szFilename[MAX_PATH + 1];
  208. HRESULT hr = UTL::getFile (BADWORDS_DLL_NAME, DLL_SUFFIX, szFilename, false, false);
  209. HMODULE hModule = 0;
  210. if (hr == S_OK)
  211. {
  212. debugf ("Attempting to load %s\n", szFilename);
  213. hModule = LoadLibrary (szFilename);
  214. }
  215. // load from anywhere
  216. if (!hModule)
  217. {
  218. debugf ("Attempting to load " BADWORDS_DLL_NAME DLL_SUFFIX "\n");
  219. hModule = LoadLibrary (BADWORDS_DLL_NAME DLL_SUFFIX);
  220. }
  221. // if the load worked, proceed normally
  222. if (hModule)
  223. {
  224. debugf ("Load of " BADWORDS_DLL_NAME " succeeded, bad word filtering will be enabled\n");
  225. // find the binary resource containing the scrambled data
  226. HRSRC hResource = FindResource (hModule, MAKEINTRESOURCE(IDR_BADWORDS), "BINARY");
  227. if (hResource)
  228. {
  229. // get the size of that resource so we can allocate a buffer for it
  230. int iSize = SizeofResource (hModule, hResource);
  231. if (iSize > 0)
  232. {
  233. // load the resource into memory
  234. HGLOBAL hResourceData = LoadResource (hModule, hResource);
  235. if (hResourceData)
  236. {
  237. // lock the resource memory so we can copy it
  238. void* pResourceData = LockResource (hResourceData);
  239. if (pResourceData)
  240. {
  241. // allocate the buffer for our own copy of the data
  242. char* pBuffer = new char[iSize];
  243. if (pBuffer)
  244. {
  245. // copy the data
  246. CopyMemory (pBuffer, pResourceData, iSize);
  247. // it's encrypted in a really dumb way, so decode it, and convert it all to lower case
  248. for (int i = 0; i < iSize; i++)
  249. {
  250. char iCharacter = pBuffer[i] ^ 0xcd;
  251. pBuffer[i] = gszConvertToLowerCase[iCharacter];
  252. }
  253. // build the filter automata
  254. BuildFilterAutomata (pBuffer);
  255. // release the buffer
  256. // actually, don't because the pointers are stored directly in the table now. Yes, this is a memory leak on a global scale.
  257. //delete[] pBuffer;
  258. }
  259. }
  260. }
  261. }
  262. }
  263. FreeLibrary (hModule);
  264. }
  265. else
  266. debugf ("FAILED TO LOAD " BADWORDS_DLL_NAME ", bad word filtering will be disabled\n");
  267. }
  268. //------------------------------------------------------------------------------
  269. // function to filter the bad words
  270. //------------------------------------------------------------------------------
  271. void FilterBadWords (char* szString)
  272. {
  273. // this algorithm absolutely stinks. There's all sorts of holes in it. It
  274. // just deserves to die horribly, but I have no time to do anything
  275. // better.
  276. // this is the set of characters we will use to obscure bad words
  277. char* random = "*&^%$#@!!@#$%^&*@$^*!#%&&%#!*^$@";
  278. // copy the string and make it totally lowercase
  279. char* szLowerCopy = new char[strlen (szString) + 1];
  280. strcpy (szLowerCopy, szString);
  281. char* tmp = szLowerCopy;
  282. while (*tmp)
  283. {
  284. *tmp = gszConvertToLowerCase[*tmp];
  285. tmp++;
  286. }
  287. // loop over all of the bad words and look for them in the string
  288. for (int i = 0; i < g_iBadWordListSize; i++)
  289. {
  290. // perform the search on the lower case copy
  291. char* szFound = strstr (szLowerCopy, g_pBadWordList[i]);
  292. while (szFound)
  293. {
  294. // we found a match, so figure the offset and copy the
  295. char* cpy = g_pBadWordList[i];
  296. tmp = szString + (szFound - szLowerCopy);
  297. while (*cpy)
  298. {
  299. // we have to make the changes to the original and the copy
  300. // so that we don't end up in an infinite loop
  301. char exclamation = random[(*cpy++) % 32];
  302. *tmp++ = exclamation;
  303. *szFound++ = exclamation;
  304. }
  305. // now search again, in case there are more instances of the word
  306. szFound = strstr (szLowerCopy, g_pBadWordList[i]);
  307. }
  308. }
  309. // delete the buffer
  310. delete[] szLowerCopy;
  311. }
  312. //------------------------------------------------------------------------------
  313. // censor a zstring, returning a new one
  314. //------------------------------------------------------------------------------
  315. ZString CensorBadWords (const ZString& string)
  316. {
  317. if (g_bCensor)
  318. {
  319. char* szBuffer = new char[string.GetLength () + 1];
  320. strcpy (szBuffer, string);
  321. FilterBadWords (szBuffer);
  322. ZString result (szBuffer);
  323. delete[] szBuffer;
  324. return result;
  325. }
  326. else
  327. return string;
  328. }
  329. //------------------------------------------------------------------------------
  330. // toggle the censor flag
  331. //------------------------------------------------------------------------------
  332. void ToggleCensorDisplay (void)
  333. {
  334. g_bCensor = ! g_bCensor;
  335. }
  336. //------------------------------------------------------------------------------
  337. // get the censor flag value
  338. //------------------------------------------------------------------------------
  339. bool CensorDisplay (void)
  340. {
  341. return g_bCensor;
  342. }
  343. //------------------------------------------------------------------------------