list.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405
  1. #define _GNU_SOURCE 1
  2. #include <stdio.h>
  3. #include <stdlib.h> // malloc/calloc
  4. #include <stdbool.h>
  5. #include <regex.h>
  6. #include <string.h>
  7. #include <assert.h>
  8. #define MAX_VARIABLE_NAME 16
  9. #define MAX_LINE_LENGTH 128
  10. #define VARIABLE_BASE_LENGTH 6
  11. //struct declarations
  12. //2 dimensional nodes
  13. // aka node1 -> node2 -> node3 -> node4 -> etc.
  14. // || || || ||
  15. // \/ \/ \/ \/
  16. // node1 node2 node2 node3
  17. typedef struct d2node * d2nodeptr;
  18. typedef struct d2node {
  19. bool multiple_variables;
  20. struct d2node * right; // the next node
  21. struct d2node * left;
  22. struct d2node * bottom; // this is the start of another linear list. Each item in the list contains a reference to the variable name
  23. struct d2node * top; //this is how one can crawl back up the 2 dimensional node
  24. char variable_name [MAX_VARIABLE_NAME];
  25. } d2node;
  26. /* this will always point at the beginning of the d2node datastructure */
  27. d2nodeptr nodep = NULL;
  28. d2nodeptr temp_nodep = NULL;
  29. /* technically this is just lazy code. but who cares? */
  30. void *
  31. xmalloc (size_t size)
  32. {
  33. void *value = malloc (size);
  34. assert(value != NULL);
  35. /* if (value == 0) */
  36. /* fatal ("virtual memory exhausted"); */
  37. return value;
  38. }
  39. d2nodeptr
  40. allocate_new_d2node ()
  41. {
  42. //FIXME why do I need to say struct d2node here?
  43. d2nodeptr d2nodep = (d2nodeptr) xmalloc (sizeof (struct d2node));
  44. d2nodep->multiple_variables = false;
  45. /* initialize the d2node's pointer's to NULL */
  46. d2nodep->right = d2nodep->left = d2nodep->bottom = NULL;
  47. /* d2nodep->multiple_variables = NULL; */
  48. return d2nodep;
  49. }
  50. /* this returns the address of the first created
  51. d2node.
  52. */
  53. d2nodeptr
  54. address_of_first_d2node (d2nodeptr nodep)
  55. {
  56. // if there is a node on top, then go up
  57. /*
  58. * node
  59. * ||
  60. * \/
  61. * node
  62. */
  63. while (nodep->top)
  64. {
  65. nodep = nodep->top;
  66. }
  67. /* if there is a node to the left, go there
  68. *
  69. * node => node => node
  70. *
  71. */
  72. while (nodep->left)
  73. {
  74. nodep = nodep->left;
  75. }
  76. return nodep;
  77. }
  78. /* Returns the start of the variable name
  79. ** input: "int purple = 5;"
  80. ** return: pointer to ^
  81. */
  82. // this function could probably be replace by a regex
  83. //THIS function is also NOT working. So that's annoying
  84. //It also causes length_of_varable name to not work either
  85. char * start_of_variable_name (char * line)
  86. {
  87. //if there is an "=" on this line:
  88. //find where the = is; move backwards until you see a " ", then once more
  89. // for another " ", then return that position plus one
  90. char * position_of_equal = (char *) memchr (line, '=', strlen(line) - 1);
  91. char * position_of_colan = (char *) memchr (line, ';', strlen(line) - 1);
  92. char * p_start_of_variable_name;
  93. if (position_of_equal) //then this is a variable definition
  94. {
  95. char * start_of_space_after_variable_name =
  96. (char *) memrchr (position_of_equal, ' ', sizeof (char));
  97. // we have to search backwards once more
  98. p_start_of_variable_name =
  99. (char *) memrchr (start_of_space_after_variable_name,
  100. ' ', sizeof (char)) + 1;
  101. }
  102. else //then this line is a variable declaration
  103. {
  104. p_start_of_variable_name =
  105. (char *) memrchr (position_of_colan, ' ', sizeof (char)) - 1;
  106. }
  107. return p_start_of_variable_name;
  108. }
  109. /* Returns the end of the variable name
  110. ** input: "int purple = 5;"
  111. ** return: pointer to ^
  112. */
  113. char * end_of_variable_name (char * line)
  114. {
  115. //If of the form "int purple = 5;"
  116. char * position_of_equal = (char *) memchr (line, '=', sizeof(char));
  117. if (position_of_equal)
  118. {
  119. char * start_of_space_before_variable_name =
  120. (char *) memrchr (position_of_equal, ' ', sizeof (char));
  121. return start_of_space_before_variable_name + 1;
  122. }
  123. }
  124. int length_of_variable_name (char * start, char * end)
  125. {
  126. int i;
  127. for (i = 0; start != end; i++)
  128. {
  129. start++;
  130. }
  131. return i;
  132. }
  133. /* This function checks the string against our common
  134. * variable name bases. If we have seen this variable's
  135. * base name (the first 6 letters) before, then we
  136. * return the pointer to the beginning of the column
  137. * where it exists
  138. * ie: if variable name is purple_7 and the 2 dimensional
  139. * nodes look like
  140. * blacke => greens => purple
  141. * || || ||
  142. * \/ \/ \/
  143. * blacke_s greens_a purple_1
  144. *
  145. * then this function will return the d2nodeptr to the
  146. * "purple" node
  147. **/
  148. d2nodeptr
  149. is_this_variable_base_unique (char * string)
  150. {
  151. extern d2nodeptr nodep;
  152. d2nodeptr temp_nodep = nodep;
  153. /* while the current variable name doesn't match the current node's base name? */
  154. /* this line doesn't appear to be working... */
  155. while (memcmp(string, temp_nodep->variable_name, VARIABLE_BASE_LENGTH) != 0)
  156. {
  157. //if there is no right node, then there is this is a new variable base
  158. //name
  159. if (!temp_nodep->right)
  160. return NULL;
  161. //if there is a node-right, then check it's name against the current
  162. //variable
  163. temp_nodep = temp_nodep->right;
  164. }
  165. //if control flow reached here, then we have a match!
  166. //return the ptr to the current node that has a common base name!
  167. return temp_nodep;
  168. }
  169. /* */
  170. void
  171. add_new_variable_base (char * string, int length)
  172. {
  173. extern d2nodeptr nodep;
  174. d2nodeptr temp_nodep = allocate_new_d2node ();
  175. memcpy (temp_nodep->variable_name, string, length);
  176. temp_nodep->variable_name[length] = '\0';
  177. /* add this new nodep to the front of the d2nodes */
  178. /*
  179. ie: if the current 2d2 nodes looks like
  180. purple => greens => hammar
  181. * and the new variable base is "colors"
  182. * then make the d2node structure look like
  183. * colors => purple => greens => hammar
  184. */
  185. /* if the current nodep has a valid address, but is empty
  186. ** then initialize nodep.
  187. */
  188. if (nodep && nodep->variable_name[0] == '\0')
  189. {
  190. /* This is sloppy coding.
  191. * It discards the reference to the original and unused region of memory
  192. * that nodep originally pointed to.
  193. */
  194. nodep = temp_nodep;
  195. }
  196. else
  197. {
  198. temp_nodep->right = nodep;
  199. nodep = temp_nodep;
  200. }
  201. }
  202. /*
  203. * this function should be called after is_this_variable_base_unique
  204. * this function checks the current
  205. *
  206. */
  207. d2nodeptr is_this_a_new_variable (char * string, int length)
  208. {
  209. extern d2nodeptr temp_nodep;
  210. /* while the current variable name doesn't match the current node's
  211. base name... */
  212. while (memcmp(string, temp_nodep->variable_name, length) != 0)
  213. {
  214. //if there is no bottom node, then this is a new variable
  215. //return so, add_new_variable_at_base can add the variable
  216. //to the current column
  217. if (!temp_nodep->bottom)
  218. return temp_nodep;
  219. //if there is a node-right, then check it's name against the current
  220. //variable
  221. temp_nodep = temp_nodep->bottom;
  222. }
  223. //if control flow reached here, then we have already seen this variable.
  224. //tell the caller that we should not add it again.
  225. return NULL;
  226. }
  227. void
  228. store_c_file_common_variables (FILE * input_file_stream) {
  229. // make a regexp to search for variables, and I will search by line
  230. // int purple_variable = 5;
  231. // int purple_carrot = 10;
  232. // int purdue_sunshine = 1;
  233. // "int [a-zA-Z][a-zA-Z0-9_]* += +[0-9]+;"
  234. // look for the first common characters.
  235. regmatch_t matchptr [12];
  236. regex_t regex_c_variable_declaration;
  237. //since we are tokenizing "<" and ">", some of the tokens will be the
  238. //whitespace plus newlines between <div>s. So I need a way to ignore
  239. //those whitespace tokens
  240. int regcompile_flags = REG_EXTENDED|REG_NOSUB;
  241. //this is not a complete regex. There are some elements that look like
  242. //it contains spaces...
  243. int error = regcomp (&regex_c_variable_declaration,
  244. /* this is a butter regexp, but it's not working properly yet */
  245. /* "^ *[a-zA-Z0-9]+ +[a-zA-Z0-9]=[a-zA-Z0-9]$", */
  246. /* "char purple*", */
  247. /* "^ *(char|int) +[a-zA-Z0-9_]+ += .*;$", */
  248. /* "^ *char +[a-zA-Z0-9_]+ += .*;$", */
  249. "^ *(char|int|float|double) *[a-zA-Z0-9_]+",
  250. regcompile_flags);
  251. if (error != 0)
  252. {
  253. char string[50];
  254. regerror (error, &regex_c_variable_declaration, string,
  255. sizeof(char) * 50);
  256. }
  257. size_t length = 0;
  258. char * line = NULL;
  259. size_t nread;
  260. char * p_start_of_variable_name = NULL;
  261. char * p_end_of_variable_name = NULL;
  262. int i_length_of_variable_name = 0;
  263. /* this will always point to the 1st d2node */
  264. extern d2nodeptr nodep;
  265. extern d2nodeptr temp_nodep;
  266. nodep = allocate_new_d2node ();
  267. temp_nodep = nodep;
  268. /* this variable will traverse through the node datastructure */
  269. //loop through the lines of the file
  270. while ((nread = getline(&line, &length, input_file_stream)) != -1) {
  271. //if the current line has a variable declaration...
  272. //printf("retrieved line of length %zu:\n", nread);
  273. if (regexec (&regex_c_variable_declaration, line, 0, 0, 0) == 0)
  274. {
  275. //fwrite(line, nread, 1, stdout);
  276. // get the start of the variable name
  277. p_start_of_variable_name = start_of_variable_name (line);
  278. p_end_of_variable_name = end_of_variable_name (line);
  279. i_length_of_variable_name =
  280. length_of_variable_name (p_start_of_variable_name,
  281. p_end_of_variable_name);
  282. /* we are interested in variable names longer than 6 */
  283. if (i_length_of_variable_name < 6)
  284. continue;
  285. temp_nodep = nodep;
  286. temp_nodep = is_this_variable_base_unique
  287. (p_start_of_variable_name);
  288. if (!temp_nodep) //if this is a new variable base name, then add it
  289. {
  290. add_new_variable_base (p_start_of_variable_name,
  291. i_length_of_variable_name);
  292. continue;
  293. }
  294. /*
  295. * if control reaches here, then we have seen this variable base
  296. * before. temp_nodep now points to the column of common base names
  297. * let's check to see if we have seen this variable name before.
  298. *
  299. */
  300. temp_nodep = is_this_a_new_variable (p_start_of_variable_name,
  301. i_length_of_variable_name);
  302. /* if we get a pointer, then that is where we need to add the new variable */
  303. if (temp_nodep)
  304. {
  305. //temp_nodep now points to the bottom of the column of the variable base
  306. //so we need to add a new variable name under it.
  307. temp_nodep->bottom = allocate_new_d2node ();
  308. //If I change this to strcpy, then I don't need i_length_of_variable_name anymore?
  309. memcpy (temp_nodep->bottom->variable_name, p_start_of_variable_name,
  310. i_length_of_variable_name);
  311. nodep->variable_name[i_length_of_variable_name] = '\0';
  312. }
  313. /* if control flow reaches here, then we have seen this variable name before.
  314. ** So there's no need to do anything. :)
  315. */
  316. /* fwrite (line + p_start_of_variable_name, 1, */
  317. /* i_length_of_variable_name, stdout); */
  318. /* printf("\n"); */
  319. //copy the variable name into nodep
  320. /* memcpy (nodep->variable_name, p_start_of_variable_name, */
  321. /* i_length_of_variable_name); */
  322. // add a null terminating byte at end of string
  323. //this makes it easy to do printf to the variable_name later on
  324. /* nodep->variable_name[i_length_of_variable_name] = '\0'; */
  325. /* fwrite (nodep->variable_name, 1, i_length_of_variable_name, stdout); */
  326. /* printf("%s\n", nodep->variable_name); */
  327. }
  328. }
  329. }
  330. void
  331. print_common_base_name_variables ()
  332. {
  333. extern d2nodeptr nodep, temp_nodep;
  334. while (temp_nodep)
  335. {
  336. printf("\t%s\n", temp_nodep->variable_name);
  337. temp_nodep = temp_nodep->bottom;
  338. }
  339. }
  340. /*
  341. ** This function should be called after, store_c_file_common_variables
  342. **
  343. ** It prints all the variable names in the 2deminsional datastructure.
  344. **
  345. ** It is a naive implementation. nodep ends up pointing at the end of the
  346. ** datastructure. So you cannot call this function twice
  347. **
  348. ** THE PROBLEM IS HERE. FIXME! The store variables datastrure is storing data
  349. ** fine. BUT THIS function is NOT PRINTING THEM!
  350. **
  351. */
  352. void
  353. print_c_file_common_variables ()
  354. {
  355. extern d2nodeptr nodep;
  356. extern d2nodeptr temp_nodep;
  357. temp_nodep = nodep;
  358. //while there is another common base to print, or more of the current
  359. //common base to print, keep printing
  360. do
  361. {
  362. printf ("The common base of '");
  363. fwrite(nodep->variable_name, VARIABLE_BASE_LENGTH, 1, stdout);
  364. printf("' has these members:\n");
  365. print_common_base_name_variables();
  366. temp_nodep = nodep->right;
  367. nodep = nodep->right;
  368. //if there's another list of variables with a new common base
  369. //then set up the loop to print them.
  370. //If I make this nodep->right, there's a big error...
  371. } while (nodep);
  372. }