123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375 |
- $OpenBSD: patch-lib_search_c,v 1.2 2008/09/06 21:49:15 sthen Exp $
- --- lib/search.c.orig Wed Nov 29 21:02:21 2006
- +++ lib/search.c Sat Sep 6 22:44:37 2008
- @@ -13,6 +13,7 @@
- #include <stdlib.h>
- #include <string.h>
- #include <assert.h>
- +#include <limits.h>
-
- #include "wn.h"
-
- @@ -119,33 +120,22 @@ IndexPtr parse_index(long offset, int dbase, char *lin
- if ( !line )
- line = read_index( offset, indexfps[dbase] );
-
- - idx = (IndexPtr)malloc(sizeof(Index));
- + idx = (IndexPtr)calloc(1, sizeof(Index));
- assert(idx);
-
- /* set offset of entry in index file */
- idx->idxoffset = offset;
-
- - idx->wd='\0';
- - idx->pos='\0';
- - idx->off_cnt=0;
- - idx->tagged_cnt = 0;
- - idx->sense_cnt=0;
- - idx->offset='\0';
- - idx->ptruse_cnt=0;
- - idx->ptruse='\0';
- -
- /* get the word */
- ptrtok=strtok(line," \n");
-
- - idx->wd = malloc(strlen(ptrtok) + 1);
- + idx->wd = strdup(ptrtok);
- assert(idx->wd);
- - strcpy(idx->wd, ptrtok);
-
- /* get the part of speech */
- ptrtok=strtok(NULL," \n");
- - idx->pos = malloc(strlen(ptrtok) + 1);
- + idx->pos = strdup(ptrtok);
- assert(idx->pos);
- - strcpy(idx->pos, ptrtok);
-
- /* get the collins count */
- ptrtok=strtok(NULL," \n");
- @@ -154,7 +144,12 @@ IndexPtr parse_index(long offset, int dbase, char *lin
- /* get the number of pointers types */
- ptrtok=strtok(NULL," \n");
- idx->ptruse_cnt = atoi(ptrtok);
- -
- +
- + if (idx->ptruse_cnt < 0 || (unsigned int)idx->ptruse_cnt > UINT_MAX/sizeof(int)) {
- + free_index(idx);
- + return(NULL);
- + }
- +
- if (idx->ptruse_cnt) {
- idx->ptruse = (int *) malloc(idx->ptruse_cnt * (sizeof(int)));
- assert(idx->ptruse);
- @@ -173,9 +168,14 @@ IndexPtr parse_index(long offset, int dbase, char *lin
- /* get the number of senses that are tagged */
- ptrtok=strtok(NULL," \n");
- idx->tagged_cnt = atoi(ptrtok);
- -
- +
- + if (idx->off_cnt < 0 || (unsigned long)idx->off_cnt > ULONG_MAX/sizeof(long)) {
- + free_index(idx);
- + return(NULL);
- + }
- +
- /* make space for the offsets */
- - idx->offset = (long *) malloc(idx->off_cnt * (sizeof(long)));
- + idx->offset = (unsigned long *) malloc(idx->off_cnt * sizeof(long));
- assert(idx->offset);
-
- /* get the offsets */
- @@ -197,15 +197,21 @@ IndexPtr getindex(char *searchstr, int dbase)
- char strings[MAX_FORMS][WORDBUF]; /* vector of search strings */
- static IndexPtr offsets[MAX_FORMS];
- static int offset;
- -
- +
- /* This works like strrok(): if passed with a non-null string,
- prepare vector of search strings and offsets. If string
- is null, look at current list of offsets and return next
- one, or NULL if no more alternatives for this word. */
-
- if (searchstr != NULL) {
- + /* Bail out if the input is too long for us to handle */
- + if (strlen(searchstr) > (WORDBUF - 1)) {
- + strcpy(msgbuf, "WordNet library error: search term is too long\n");
- + display_message(msgbuf);
- + return(NULL);
- + }
-
- - offset = 0;
- + offset = 0;
- strtolower(searchstr);
- for (i = 0; i < MAX_FORMS; i++) {
- strcpy(strings[i], searchstr);
- @@ -229,11 +235,11 @@ IndexPtr getindex(char *searchstr, int dbase)
- /* Get offset of first entry. Then eliminate duplicates
- and get offsets of unique strings. */
-
- - if (strings[0][0] != NULL)
- + if (strings[0] != NULL)
- offsets[0] = index_lookup(strings[0], dbase);
-
- for (i = 1; i < MAX_FORMS; i++)
- - if ((strings[i][0]) != NULL && (strcmp(strings[0], strings[i])))
- + if (strings[i] != NULL && (strcmp(strings[0], strings[i])))
- offsets[i] = index_lookup(strings[i], dbase);
- }
-
- @@ -272,7 +278,7 @@ SynsetPtr read_synset(int dbase, long boffset, char *w
- SynsetPtr parse_synset(FILE *fp, int dbase, char *word)
- {
- static char line[LINEBUF];
- - char tbuf[SMLINEBUF];
- + char tbuf[SMLINEBUF] = "";
- char *ptrtok;
- char *tmpptr;
- int foundpert = 0;
- @@ -286,33 +292,11 @@ SynsetPtr parse_synset(FILE *fp, int dbase, char *word
- if ((tmpptr = fgets(line, LINEBUF, fp)) == NULL)
- return(NULL);
-
- - synptr = (SynsetPtr)malloc(sizeof(Synset));
- + synptr = (SynsetPtr)calloc(1, sizeof(Synset));
- assert(synptr);
- -
- - synptr->hereiam = 0;
- +
- synptr->sstype = DONT_KNOW;
- - synptr->fnum = 0;
- - synptr->pos = '\0';
- - synptr->wcount = 0;
- - synptr->words = '\0';
- - synptr->whichword = 0;
- - synptr->ptrcount = 0;
- - synptr->ptrtyp = '\0';
- - synptr->ptroff = '\0';
- - synptr->ppos = '\0';
- - synptr->pto = '\0';
- - synptr->pfrm = '\0';
- - synptr->fcount = 0;
- - synptr->frmid = '\0';
- - synptr->frmto = '\0';
- - synptr->defn = '\0';
- - synptr->key = 0;
- - synptr->nextss = NULL;
- - synptr->nextform = NULL;
- synptr->searchtype = -1;
- - synptr->ptrlist = NULL;
- - synptr->headword = NULL;
- - synptr->headsense = 0;
-
- ptrtok = line;
-
- @@ -322,7 +306,7 @@ SynsetPtr parse_synset(FILE *fp, int dbase, char *word
-
- /* sanity check - make sure starting file offset matches first field */
- if (synptr->hereiam != loc) {
- - sprintf(msgbuf, "WordNet library error: no synset at location %d\n",
- + sprintf(msgbuf, "WordNet library error: no synset at location %ld\n",
- loc);
- display_message(msgbuf);
- free(synptr);
- @@ -335,16 +319,20 @@ SynsetPtr parse_synset(FILE *fp, int dbase, char *word
-
- /* looking at POS */
- ptrtok = strtok(NULL, " \n");
- - synptr->pos = malloc(strlen(ptrtok) + 1);
- + synptr->pos = strdup(ptrtok);
- assert(synptr->pos);
- - strcpy(synptr->pos, ptrtok);
- if (getsstype(synptr->pos) == SATELLITE)
- synptr->sstype = INDIRECT_ANT;
-
- /* looking at numwords */
- ptrtok = strtok(NULL, " \n");
- synptr->wcount = strtol(ptrtok, NULL, 16);
- -
- +
- + if (synptr->wcount < 0 || (unsigned int)synptr->wcount > UINT_MAX/sizeof(char *)) {
- + free_syns(synptr);
- + return(NULL);
- + }
- +
- synptr->words = (char **)malloc(synptr->wcount * sizeof(char *));
- assert(synptr->words);
- synptr->wnsns = (int *)malloc(synptr->wcount * sizeof(int));
- @@ -354,9 +342,8 @@ SynsetPtr parse_synset(FILE *fp, int dbase, char *word
-
- for (i = 0; i < synptr->wcount; i++) {
- ptrtok = strtok(NULL, " \n");
- - synptr->words[i] = malloc(strlen(ptrtok) + 1);
- + synptr->words[i] = strdup(ptrtok);
- assert(synptr->words[i]);
- - strcpy(synptr->words[i], ptrtok);
-
- /* is this the word we're looking for? */
-
- @@ -371,6 +358,12 @@ SynsetPtr parse_synset(FILE *fp, int dbase, char *word
- ptrtok = strtok(NULL," \n");
- synptr->ptrcount = atoi(ptrtok);
-
- + /* Should we check for long here as well? */
- + if (synptr->ptrcount < 0 || (unsigned int)synptr->ptrcount > UINT_MAX/sizeof(int)) {
- + free_syns(synptr);
- + return(NULL);
- + }
- +
- if (synptr->ptrcount) {
-
- /* alloc storage for the pointers */
- @@ -455,21 +448,23 @@ SynsetPtr parse_synset(FILE *fp, int dbase, char *word
- ptrtok = strtok(NULL," \n");
- if (ptrtok) {
- ptrtok = strtok(NULL," \n");
- - sprintf(tbuf, "");
- while (ptrtok != NULL) {
- + if (strlen(ptrtok) + strlen(tbuf) + 1 + 1 > sizeof(tbuf)) {
- + free_syns(synptr);
- + return(NULL);
- + }
- strcat(tbuf,ptrtok);
- ptrtok = strtok(NULL, " \n");
- if(ptrtok)
- strcat(tbuf," ");
- }
- - assert((1 + strlen(tbuf)) < sizeof(tbuf));
- - synptr->defn = malloc(strlen(tbuf) + 4);
- + synptr->defn = malloc(strlen(tbuf) + 3);
- assert(synptr->defn);
- sprintf(synptr->defn,"(%s)",tbuf);
- }
-
- if (keyindexfp) { /* we have unique keys */
- - sprintf(tmpbuf, "%c:%8.8d", partchars[dbase], synptr->hereiam);
- + sprintf(tmpbuf, "%c:%8.8ld", partchars[dbase], synptr->hereiam);
- synptr->key = GetKeyForOffset(tmpbuf);
- }
-
- @@ -635,7 +630,7 @@ static void traceptrs(SynsetPtr synptr, int ptrtyp, in
-
- if ((ptrtyp == PERTPTR || ptrtyp == PPLPTR) &&
- synptr->pto[i] != 0) {
- - sprintf(tbuf, " (Sense %d)\n",
- + snprintf(tbuf, sizeof(tbuf), " (Sense %d)\n",
- cursyn->wnsns[synptr->pto[i] - 1]);
- printsynset(prefix, cursyn, tbuf, DEFOFF, synptr->pto[i],
- SKIP_ANTS, PRINT_MARKER);
- @@ -656,7 +651,7 @@ static void traceptrs(SynsetPtr synptr, int ptrtyp, in
- traceptrs(cursyn, HYPERPTR, getpos(cursyn->pos), 0);
- }
- } else if (ptrtyp == ANTPTR && dbase != ADJ && synptr->pto[i] != 0) {
- - sprintf(tbuf, " (Sense %d)\n",
- + snprintf(tbuf, sizeof(tbuf), " (Sense %d)\n",
- cursyn->wnsns[synptr->pto[i] - 1]);
- printsynset(prefix, cursyn, tbuf, DEFOFF, synptr->pto[i],
- SKIP_ANTS, PRINT_MARKER);
- @@ -817,7 +812,7 @@ static void tracenomins(SynsetPtr synptr, int dbase)
-
- cursyn = read_synset(synptr->ppos[i], synptr->ptroff[i], "");
-
- - sprintf(tbuf, "#%d\n",
- + snprintf(tbuf, sizeof(tbuf), "#%d\n",
- cursyn->wnsns[synptr->pto[i] - 1]);
- printsynset(prefix, cursyn, tbuf, DEFOFF, synptr->pto[i],
- SKIP_ANTS, SKIP_MARKER);
- @@ -989,12 +984,12 @@ void getexample(char *offset, char *wd)
- char sentbuf[512];
-
- if (vsentfilefp != NULL) {
- - if (line = bin_search(offset, vsentfilefp)) {
- + if ((line = bin_search(offset, vsentfilefp)) != NULL) {
- while(*line != ' ')
- line++;
-
- printbuffer(" EX: ");
- - sprintf(sentbuf, line, wd);
- + snprintf(sentbuf, sizeof(sentbuf), line, wd);
- printbuffer(sentbuf);
- }
- }
- @@ -1011,7 +1006,7 @@ int findexample(SynsetPtr synptr)
- if (vidxfilefp != NULL) {
- wdnum = synptr->whichword - 1;
-
- - sprintf(tbuf,"%s%%%-1.1d:%-2.2d:%-2.2d::",
- + snprintf(tbuf, sizeof(tbuf), "%s%%%-1.1d:%-2.2d:%-2.2d::",
- synptr->words[wdnum],
- getpos(synptr->pos),
- synptr->fnum,
- @@ -1124,7 +1119,7 @@ static void freq_word(IndexPtr index)
- if (cnt >= 17 && cnt <= 32) familiar = 6;
- if (cnt > 32 ) familiar = 7;
-
- - sprintf(tmpbuf,
- + snprintf(tmpbuf, sizeof(tmpbuf),
- "\n%s used as %s is %s (polysemy count = %d)\n",
- index->wd, a_an[getpos(index->pos)], freqcats[familiar], cnt);
- printbuffer(tmpbuf);
- @@ -1147,6 +1142,9 @@ void wngrep (char *word_passed, int pos) {
- }
- rewind(inputfile);
-
- + if (strlen(word_passed) + 1 > sizeof(word))
- + return;
- +
- strcpy (word, word_passed);
- ToLowerCase(word); /* map to lower case for index file search */
- strsubst (word, ' ', '_'); /* replace spaces with underscores */
- @@ -1169,7 +1167,7 @@ void wngrep (char *word_passed, int pos) {
- ((line[loc + wordlen] == '-') || (line[loc + wordlen] == '_')))
- ) {
- strsubst (line, '_', ' ');
- - sprintf (tmpbuf, "%s\n", line);
- + snprintf (tmpbuf, sizeof(tmpbuf), "%s\n", line);
- printbuffer (tmpbuf);
- break;
- }
- @@ -1570,7 +1568,8 @@ char *findtheinfo(char *searchstr, int dbase, int ptrt
- bufstart[0] = '\n';
- bufstart++;
- }
- - strncpy(bufstart, tmpbuf, strlen(tmpbuf));
- + /* Don't include the \0 */
- + memcpy(bufstart, tmpbuf, strlen(tmpbuf));
- bufstart = searchbuffer + strlen(searchbuffer);
- }
- }
- @@ -1683,9 +1682,8 @@ SynsetPtr traceptrs_ds(SynsetPtr synptr, int ptrtyp, i
- cursyn = read_synset(synptr->ppos[i],
- synptr->ptroff[i],
- "");
- - synptr->headword = malloc(strlen(cursyn->words[0]) + 1);
- + synptr->headword = strdup(cursyn->words[0]);
- assert(synptr->headword);
- - strcpy(synptr->headword, cursyn->words[0]);
- synptr->headsense = cursyn->lexid[0];
- free_synset(cursyn);
- break;
- @@ -2013,7 +2011,7 @@ static int getsearchsense(SynsetPtr synptr, int whichw
- strsubst(strcpy(wdbuf, synptr->words[whichword - 1]), ' ', '_');
- strtolower(wdbuf);
-
- - if (idx = index_lookup(wdbuf, getpos(synptr->pos))) {
- + if ((idx = index_lookup(wdbuf, getpos(synptr->pos))) != NULL) {
- for (i = 0; i < idx->off_cnt; i++)
- if (idx->offset[i] == synptr->hereiam) {
- free_index(idx);
- @@ -2037,7 +2035,7 @@ static void printsynset(char *head, SynsetPtr synptr,
- by flags */
-
- if (offsetflag) /* print synset offset */
- - sprintf(tbuf + strlen(tbuf),"{%8.8d} ", synptr->hereiam);
- + sprintf(tbuf + strlen(tbuf),"{%8.8ld} ", synptr->hereiam);
- if (fileinfoflag) { /* print lexicographer file information */
- sprintf(tbuf + strlen(tbuf), "<%s> ", lexfiles[synptr->fnum]);
- prlexid = 1; /* print lexicographer id after word */
- @@ -2072,7 +2070,7 @@ static void printantsynset(SynsetPtr synptr, char *tai
- tbuf[0] = '\0';
-
- if (offsetflag)
- - sprintf(tbuf,"{%8.8d} ", synptr->hereiam);
- + sprintf(tbuf,"{%8.8ld} ", synptr->hereiam);
- if (fileinfoflag) {
- sprintf(tbuf + strlen(tbuf),"<%s> ", lexfiles[synptr->fnum]);
- prlexid = 1;
|