cheatDetector.cpp 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. /*Dylan Jeffers
  2. *Tahmid Rahman
  3. *founders of RahmROMJeffers ltd.
  4. *
  5. *cheatDetector implementation - compares documents to each
  6. *other and outputs how many phrases of a given length match
  7. */
  8. #include <iostream>
  9. #include <fstream>
  10. #include <stdlib.h>
  11. #include "pair.h"
  12. #include "linkedBST.h"
  13. #include "library/circularArrayList.h"
  14. #include "AVLTree.h"
  15. using namespace std;
  16. BST<string, int>* storeDoc(string fileName, bool useAVL, int phraseSize);
  17. List < Pair<string, BST<string, int>* > >*
  18. loadDocs(string flist, bool useAVL, int phraseSize);
  19. void cleanUp(List < Pair<string, BST<string, int>* > >* docList);
  20. void bestCompare(List < Pair<string, BST<string, int>* > >* docList);
  21. int compare(BST<string, int>* doc1, BST<string, int>* doc2);
  22. int main(int argc, char* argv[]){
  23. if(argc != 4){
  24. cerr << "Incorrect number of arguments" << endl;
  25. cerr << "Usage: cheatDetector file-list phrase-size useAVL" << endl;
  26. return 1;
  27. }
  28. int phraseSize = atoi(argv[2]);
  29. bool useAVL = atoi(argv[3]);
  30. string flist(argv[1]);
  31. if (phraseSize <= 0){
  32. cerr << "Incorrect number for phrase-size" << endl;
  33. cerr << "Phrase size must be greater than 0" << endl;
  34. return 1;
  35. }
  36. List < Pair<string, BST<string, int>* > >* docList;
  37. docList = loadDocs(flist, useAVL, phraseSize);
  38. bestCompare(docList);
  39. cleanUp(docList);
  40. delete docList;
  41. return 0;
  42. }
  43. /*storeDoc
  44. *inputs: name of file, useAVL, phraseSize
  45. *opens a single document and loads it into a binary tree
  46. *returns binary tree version of document
  47. */
  48. BST<string, int>* storeDoc(string fileName, int useAVL, int phraseSize) {
  49. BST<string, int>* document;
  50. if (!useAVL){
  51. document = new SPLAVL<string, int>();
  52. }
  53. else{
  54. document = new AVLTree<string, int>();
  55. }
  56. /*Loading algorithm:
  57. *1. Upload first n words determined by phraseSize
  58. * into a Circurlar Array List.
  59. *
  60. *2. Concatenate words into single phrase and
  61. * upload into BST
  62. *
  63. *3. Remove first word from phrase, inserts next word into phrase.
  64. *
  65. *4. Continues uploading phrases into BST until end of file.
  66. */
  67. List<string>* words = new CircularArrayList<string>();
  68. ifstream inFile;
  69. string word;
  70. string phrase;
  71. inFile.open(fileName.c_str());
  72. if (!inFile.is_open()) {
  73. cerr << "Specific file did not open." << endl;
  74. delete words;
  75. return document;
  76. }
  77. for (int i = 0; i < phraseSize; i++) {
  78. inFile >> word;
  79. words->insertAtTail(word);
  80. phrase = phrase + words->peekTail() + " ";
  81. }
  82. document->insert(phrase, 1);
  83. inFile >> word;
  84. while(!((inFile.eof()))){
  85. words->removeHead();
  86. phrase = "";
  87. words->insertAtTail(word);
  88. for (int i = 0; i < phraseSize; i++) {
  89. phrase = phrase + words->get(i) + " ";
  90. }
  91. if (!document->contains(phrase)) {
  92. document->insert(phrase, 1);
  93. }
  94. else {
  95. int value = document->find(phrase);
  96. document->update(phrase, value+1);
  97. }
  98. inFile >> word;
  99. }
  100. delete words;
  101. return document;
  102. }
  103. /*loadDocs
  104. *inputs: name of file containing list of documents,
  105. * useAVL, phraseSize
  106. *opens each document file and stores it as a BST.
  107. *return: list of all documents.
  108. */
  109. List < Pair<string, BST<string, int>* > >*
  110. loadDocs(string flist, bool useAVL, int phraseSize){
  111. ifstream inFile;
  112. string fileName;
  113. Pair< string, BST<string, int>* > currentDoc;
  114. List < Pair<string, BST<string, int>* > >* docList
  115. = new CircularArrayList< Pair<string, BST<string, int>* > >();
  116. inFile.open(flist.c_str());
  117. if (!inFile.is_open()) {
  118. cerr << "Unable to open file!" << endl;
  119. return docList;
  120. }
  121. inFile >> fileName;
  122. while(!inFile.eof()){
  123. currentDoc.first = fileName;
  124. currentDoc.second = storeDoc(fileName, useAVL, phraseSize);
  125. docList->insertAtTail(currentDoc);
  126. inFile >> fileName;
  127. }
  128. return docList;
  129. }
  130. /*cleanUp
  131. *inputs: list of pairs where each pair is <name of file, BST>
  132. *ensures all BSTs strored as second element in pair get deleted.
  133. */
  134. void cleanUp(List < Pair<string, BST<string, int>* > >* docList){
  135. BST<string, int>* BSTtoDelete;
  136. for (int i = 0; i < docList->getSize(); i++){
  137. BSTtoDelete = docList->get(i).second;
  138. delete BSTtoDelete;
  139. }
  140. }
  141. /*compare
  142. *inputs: Two BSTs representing two documents
  143. *compares the keys of one BST to the other, counting the number
  144. *of matches by referrencing the values of each node.
  145. *return: total number of matches.
  146. */
  147. int compare(BST<string, int>* doc1, BST<string, int>* doc2){
  148. Queue< Pair<string, int> >* phrasesToCompare;
  149. Pair<string, int> currentPair;
  150. int doc1Value, doc2Value;
  151. int matches = 0;
  152. phrasesToCompare = doc1->getInOrder();
  153. while(!phrasesToCompare->isEmpty()){
  154. currentPair = phrasesToCompare->dequeue();
  155. if (doc2->contains(currentPair.first)){
  156. doc1Value = currentPair.second;
  157. doc2Value = doc2->find(currentPair.first);
  158. if (doc2Value <= doc1Value){
  159. matches = matches + doc2Value;
  160. }
  161. else{
  162. matches = matches + doc1Value;
  163. }
  164. }
  165. }
  166. delete phrasesToCompare;
  167. return matches;
  168. }
  169. /*bestCompare
  170. *inputs: List of pairs where each pair is <filename,BST>
  171. *calls compare between each document, and
  172. *for each document i,
  173. * print name of document j with highest number of matches
  174. *print max height of all document BSTs
  175. *also print average height,size, and size to height ratio of all documents.
  176. */
  177. void bestCompare(List < Pair<string, BST<string, int>* > >* docList){
  178. BST<string, int>* doc1;
  179. BST<string, int>* doc2;
  180. int numDoc = docList->getSize();
  181. string bestFile = "";
  182. int bestMatch, currentMatch;
  183. float currentSize;
  184. int maxHeight, currentHeight;
  185. float totalHeight, totalSize, ratio, totalRatio;
  186. maxHeight = totalHeight = totalSize = currentSize =
  187. totalRatio = ratio = 0;
  188. for (int i = 0; i < numDoc; i++){
  189. doc1 = docList->get(i).second;
  190. currentHeight = doc1->getHeight();
  191. currentSize = doc1->getSize();
  192. if (currentSize != 0){
  193. if (currentHeight == 0){
  194. ratio = currentSize/1;
  195. }
  196. else{
  197. ratio = currentSize/currentHeight;
  198. }
  199. totalRatio = totalRatio + ratio;
  200. totalSize = totalSize + currentSize;
  201. totalHeight = totalHeight + currentHeight;
  202. bestMatch = 0;
  203. for (int j = 1; j < numDoc; j++){
  204. currentMatch = 0;
  205. doc2 = docList->get((i+j)%(numDoc)).second;
  206. if(currentHeight > maxHeight){
  207. maxHeight = doc1->getHeight();
  208. }
  209. currentMatch = compare(doc1, doc2);
  210. if (currentMatch >= bestMatch){
  211. bestMatch = currentMatch;
  212. bestFile = docList->get((i+j)%(numDoc)).first;
  213. }
  214. }
  215. cout << docList->get(i).first << "-->" << bestFile;
  216. cout << ": " << bestMatch << " matches" << endl;
  217. }
  218. }
  219. cout << endl << endl;
  220. cout << "Max height: " << maxHeight << endl;
  221. cout << "Average height: " << totalHeight/numDoc << endl;
  222. cout << "Average size: " << totalSize/numDoc << endl;
  223. cout << "Average size to height ratio: " << totalRatio/numDoc << endl;
  224. }