123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405 |
- #define _GNU_SOURCE 1
- #include <stdio.h>
- #include <stdlib.h> // malloc/calloc
- #include <stdbool.h>
- #include <regex.h>
- #include <string.h>
- #include <assert.h>
- #define MAX_VARIABLE_NAME 16
- #define MAX_LINE_LENGTH 128
- #define VARIABLE_BASE_LENGTH 6
- //struct declarations
- //2 dimensional nodes
- // aka node1 -> node2 -> node3 -> node4 -> etc.
- // || || || ||
- // \/ \/ \/ \/
- // node1 node2 node2 node3
- typedef struct d2node * d2nodeptr;
- typedef struct d2node {
- bool multiple_variables;
- struct d2node * right; // the next node
- struct d2node * left;
- struct d2node * bottom; // this is the start of another linear list. Each item in the list contains a reference to the variable name
- struct d2node * top; //this is how one can crawl back up the 2 dimensional node
- char variable_name [MAX_VARIABLE_NAME];
- } d2node;
- /* this will always point at the beginning of the d2node datastructure */
- d2nodeptr nodep = NULL;
- d2nodeptr temp_nodep = NULL;
- /* technically this is just lazy code. but who cares? */
- void *
- xmalloc (size_t size)
- {
- void *value = malloc (size);
- assert(value != NULL);
- /* if (value == 0) */
- /* fatal ("virtual memory exhausted"); */
- return value;
- }
-
- d2nodeptr
- allocate_new_d2node ()
- {
- //FIXME why do I need to say struct d2node here?
- d2nodeptr d2nodep = (d2nodeptr) xmalloc (sizeof (struct d2node));
- d2nodep->multiple_variables = false;
- /* initialize the d2node's pointer's to NULL */
- d2nodep->right = d2nodep->left = d2nodep->bottom = NULL;
- /* d2nodep->multiple_variables = NULL; */
- return d2nodep;
- }
- /* this returns the address of the first created
- d2node.
- */
- d2nodeptr
- address_of_first_d2node (d2nodeptr nodep)
- {
- // if there is a node on top, then go up
- /*
- * node
- * ||
- * \/
- * node
- */
- while (nodep->top)
- {
- nodep = nodep->top;
- }
- /* if there is a node to the left, go there
- *
- * node => node => node
- *
- */
- while (nodep->left)
- {
- nodep = nodep->left;
- }
- return nodep;
- }
- /* Returns the start of the variable name
- ** input: "int purple = 5;"
- ** return: pointer to ^
- */
- // this function could probably be replace by a regex
- //THIS function is also NOT working. So that's annoying
- //It also causes length_of_varable name to not work either
- char * start_of_variable_name (char * line)
- {
- //if there is an "=" on this line:
- //find where the = is; move backwards until you see a " ", then once more
- // for another " ", then return that position plus one
- char * position_of_equal = (char *) memchr (line, '=', strlen(line) - 1);
- char * position_of_colan = (char *) memchr (line, ';', strlen(line) - 1);
- char * p_start_of_variable_name;
- if (position_of_equal) //then this is a variable definition
- {
- char * start_of_space_after_variable_name =
- (char *) memrchr (position_of_equal, ' ', sizeof (char));
- // we have to search backwards once more
- p_start_of_variable_name =
- (char *) memrchr (start_of_space_after_variable_name,
- ' ', sizeof (char)) + 1;
- }
- else //then this line is a variable declaration
- {
- p_start_of_variable_name =
- (char *) memrchr (position_of_colan, ' ', sizeof (char)) - 1;
- }
- return p_start_of_variable_name;
- }
- /* Returns the end of the variable name
- ** input: "int purple = 5;"
- ** return: pointer to ^
- */
- char * end_of_variable_name (char * line)
- {
- //If of the form "int purple = 5;"
- char * position_of_equal = (char *) memchr (line, '=', sizeof(char));
- if (position_of_equal)
- {
- char * start_of_space_before_variable_name =
- (char *) memrchr (position_of_equal, ' ', sizeof (char));
- return start_of_space_before_variable_name + 1;
- }
- }
- int length_of_variable_name (char * start, char * end)
- {
- int i;
- for (i = 0; start != end; i++)
- {
- start++;
- }
- return i;
- }
- /* This function checks the string against our common
- * variable name bases. If we have seen this variable's
- * base name (the first 6 letters) before, then we
- * return the pointer to the beginning of the column
- * where it exists
- * ie: if variable name is purple_7 and the 2 dimensional
- * nodes look like
- * blacke => greens => purple
- * || || ||
- * \/ \/ \/
- * blacke_s greens_a purple_1
- *
- * then this function will return the d2nodeptr to the
- * "purple" node
- **/
- d2nodeptr
- is_this_variable_base_unique (char * string)
- {
- extern d2nodeptr nodep;
- d2nodeptr temp_nodep = nodep;
- /* while the current variable name doesn't match the current node's base name? */
- /* this line doesn't appear to be working... */
- while (memcmp(string, temp_nodep->variable_name, VARIABLE_BASE_LENGTH) != 0)
- {
- //if there is no right node, then there is this is a new variable base
- //name
- if (!temp_nodep->right)
- return NULL;
- //if there is a node-right, then check it's name against the current
- //variable
- temp_nodep = temp_nodep->right;
- }
- //if control flow reached here, then we have a match!
- //return the ptr to the current node that has a common base name!
- return temp_nodep;
- }
- /* */
- void
- add_new_variable_base (char * string, int length)
- {
- extern d2nodeptr nodep;
- d2nodeptr temp_nodep = allocate_new_d2node ();
- memcpy (temp_nodep->variable_name, string, length);
- temp_nodep->variable_name[length] = '\0';
- /* add this new nodep to the front of the d2nodes */
- /*
- ie: if the current 2d2 nodes looks like
- purple => greens => hammar
- * and the new variable base is "colors"
- * then make the d2node structure look like
- * colors => purple => greens => hammar
- */
- /* if the current nodep has a valid address, but is empty
- ** then initialize nodep.
- */
- if (nodep && nodep->variable_name[0] == '\0')
- {
- /* This is sloppy coding.
- * It discards the reference to the original and unused region of memory
- * that nodep originally pointed to.
- */
- nodep = temp_nodep;
- }
- else
- {
- temp_nodep->right = nodep;
- nodep = temp_nodep;
- }
- }
- /*
- * this function should be called after is_this_variable_base_unique
- * this function checks the current
- *
- */
- d2nodeptr is_this_a_new_variable (char * string, int length)
- {
- extern d2nodeptr temp_nodep;
- /* while the current variable name doesn't match the current node's
- base name... */
- while (memcmp(string, temp_nodep->variable_name, length) != 0)
- {
- //if there is no bottom node, then this is a new variable
- //return so, add_new_variable_at_base can add the variable
- //to the current column
- if (!temp_nodep->bottom)
- return temp_nodep;
- //if there is a node-right, then check it's name against the current
- //variable
- temp_nodep = temp_nodep->bottom;
- }
- //if control flow reached here, then we have already seen this variable.
- //tell the caller that we should not add it again.
- return NULL;
- }
- void
- store_c_file_common_variables (FILE * input_file_stream) {
- // make a regexp to search for variables, and I will search by line
- // int purple_variable = 5;
- // int purple_carrot = 10;
- // int purdue_sunshine = 1;
- // "int [a-zA-Z][a-zA-Z0-9_]* += +[0-9]+;"
- // look for the first common characters.
- regmatch_t matchptr [12];
- regex_t regex_c_variable_declaration;
- //since we are tokenizing "<" and ">", some of the tokens will be the
- //whitespace plus newlines between <div>s. So I need a way to ignore
- //those whitespace tokens
- int regcompile_flags = REG_EXTENDED|REG_NOSUB;
- //this is not a complete regex. There are some elements that look like
- //it contains spaces...
- int error = regcomp (®ex_c_variable_declaration,
- /* this is a butter regexp, but it's not working properly yet */
- /* "^ *[a-zA-Z0-9]+ +[a-zA-Z0-9]=[a-zA-Z0-9]$", */
- /* "char purple*", */
- /* "^ *(char|int) +[a-zA-Z0-9_]+ += .*;$", */
- /* "^ *char +[a-zA-Z0-9_]+ += .*;$", */
- "^ *(char|int|float|double) *[a-zA-Z0-9_]+",
- regcompile_flags);
- if (error != 0)
- {
- char string[50];
- regerror (error, ®ex_c_variable_declaration, string,
- sizeof(char) * 50);
- }
- size_t length = 0;
- char * line = NULL;
- size_t nread;
- char * p_start_of_variable_name = NULL;
- char * p_end_of_variable_name = NULL;
- int i_length_of_variable_name = 0;
- /* this will always point to the 1st d2node */
- extern d2nodeptr nodep;
- extern d2nodeptr temp_nodep;
- nodep = allocate_new_d2node ();
- temp_nodep = nodep;
- /* this variable will traverse through the node datastructure */
- //loop through the lines of the file
- while ((nread = getline(&line, &length, input_file_stream)) != -1) {
- //if the current line has a variable declaration...
- //printf("retrieved line of length %zu:\n", nread);
- if (regexec (®ex_c_variable_declaration, line, 0, 0, 0) == 0)
- {
- //fwrite(line, nread, 1, stdout);
- // get the start of the variable name
- p_start_of_variable_name = start_of_variable_name (line);
- p_end_of_variable_name = end_of_variable_name (line);
- i_length_of_variable_name =
- length_of_variable_name (p_start_of_variable_name,
- p_end_of_variable_name);
- /* we are interested in variable names longer than 6 */
- if (i_length_of_variable_name < 6)
- continue;
- temp_nodep = nodep;
- temp_nodep = is_this_variable_base_unique
- (p_start_of_variable_name);
- if (!temp_nodep) //if this is a new variable base name, then add it
- {
- add_new_variable_base (p_start_of_variable_name,
- i_length_of_variable_name);
- continue;
- }
- /*
- * if control reaches here, then we have seen this variable base
- * before. temp_nodep now points to the column of common base names
- * let's check to see if we have seen this variable name before.
- *
- */
- temp_nodep = is_this_a_new_variable (p_start_of_variable_name,
- i_length_of_variable_name);
- /* if we get a pointer, then that is where we need to add the new variable */
- if (temp_nodep)
- {
- //temp_nodep now points to the bottom of the column of the variable base
- //so we need to add a new variable name under it.
- temp_nodep->bottom = allocate_new_d2node ();
- //If I change this to strcpy, then I don't need i_length_of_variable_name anymore?
- memcpy (temp_nodep->bottom->variable_name, p_start_of_variable_name,
- i_length_of_variable_name);
- nodep->variable_name[i_length_of_variable_name] = '\0';
- }
- /* if control flow reaches here, then we have seen this variable name before.
- ** So there's no need to do anything. :)
- */
- /* fwrite (line + p_start_of_variable_name, 1, */
- /* i_length_of_variable_name, stdout); */
- /* printf("\n"); */
- //copy the variable name into nodep
- /* memcpy (nodep->variable_name, p_start_of_variable_name, */
- /* i_length_of_variable_name); */
- // add a null terminating byte at end of string
- //this makes it easy to do printf to the variable_name later on
- /* nodep->variable_name[i_length_of_variable_name] = '\0'; */
- /* fwrite (nodep->variable_name, 1, i_length_of_variable_name, stdout); */
- /* printf("%s\n", nodep->variable_name); */
- }
- }
- }
- void
- print_common_base_name_variables ()
- {
- extern d2nodeptr nodep, temp_nodep;
- while (temp_nodep)
- {
- printf("\t%s\n", temp_nodep->variable_name);
- temp_nodep = temp_nodep->bottom;
- }
- }
- /*
- ** This function should be called after, store_c_file_common_variables
- **
- ** It prints all the variable names in the 2deminsional datastructure.
- **
- ** It is a naive implementation. nodep ends up pointing at the end of the
- ** datastructure. So you cannot call this function twice
- **
- ** THE PROBLEM IS HERE. FIXME! The store variables datastrure is storing data
- ** fine. BUT THIS function is NOT PRINTING THEM!
- **
- */
- void
- print_c_file_common_variables ()
- {
- extern d2nodeptr nodep;
- extern d2nodeptr temp_nodep;
- temp_nodep = nodep;
-
- //while there is another common base to print, or more of the current
- //common base to print, keep printing
- do
- {
- printf ("The common base of '");
- fwrite(nodep->variable_name, VARIABLE_BASE_LENGTH, 1, stdout);
- printf("' has these members:\n");
- print_common_base_name_variables();
- temp_nodep = nodep->right;
- nodep = nodep->right;
- //if there's another list of variables with a new common base
- //then set up the loop to print them.
- //If I make this nodep->right, there's a big error...
- } while (nodep);
- }
|