123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482 |
- /* This is the Assembler Pre-Processor
- Copyright (C) 1987-2015 Free Software Foundation, Inc.
- This file is part of GAS, the GNU Assembler.
- GAS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
- GAS is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
- License for more details.
- You should have received a copy of the GNU General Public License
- along with GAS; see the file COPYING. If not, write to the Free
- Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
- 02110-1301, USA. */
- /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90. */
- /* App, the assembler pre-processor. This pre-processor strips out
- excess spaces, turns single-quoted characters into a decimal
- constant, and turns the # in # <number> <filename> <garbage> into a
- .linefile. This needs better error-handling. */
- #include "as.h"
- #if (__STDC__ != 1)
- #ifndef const
- #define const /* empty */
- #endif
- #endif
- #ifdef H_TICK_HEX
- int enable_h_tick_hex = 0;
- #endif
- #ifdef TC_M68K
- /* Whether we are scrubbing in m68k MRI mode. This is different from
- flag_m68k_mri, because the two flags will be affected by the .mri
- pseudo-op at different times. */
- static int scrub_m68k_mri;
- /* The pseudo-op which switches in and out of MRI mode. See the
- comment in do_scrub_chars. */
- static const char mri_pseudo[] = ".mri 0";
- #else
- #define scrub_m68k_mri 0
- #endif
- #if defined TC_ARM && defined OBJ_ELF
- /* The pseudo-op for which we need to special-case `@' characters.
- See the comment in do_scrub_chars. */
- static const char symver_pseudo[] = ".symver";
- static const char * symver_state;
- #endif
- static char lex[256];
- static const char symbol_chars[] =
- "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
- #define LEX_IS_SYMBOL_COMPONENT 1
- #define LEX_IS_WHITESPACE 2
- #define LEX_IS_LINE_SEPARATOR 3
- #define LEX_IS_COMMENT_START 4
- #define LEX_IS_LINE_COMMENT_START 5
- #define LEX_IS_TWOCHAR_COMMENT_1ST 6
- #define LEX_IS_STRINGQUOTE 8
- #define LEX_IS_COLON 9
- #define LEX_IS_NEWLINE 10
- #define LEX_IS_ONECHAR_QUOTE 11
- #ifdef TC_V850
- #define LEX_IS_DOUBLEDASH_1ST 12
- #endif
- #ifdef TC_M32R
- #define DOUBLEBAR_PARALLEL
- #endif
- #ifdef DOUBLEBAR_PARALLEL
- #define LEX_IS_DOUBLEBAR_1ST 13
- #endif
- #define LEX_IS_PARALLEL_SEPARATOR 14
- #ifdef H_TICK_HEX
- #define LEX_IS_H 15
- #endif
- #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
- #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
- #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
- #define IS_PARALLEL_SEPARATOR(c) (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
- #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
- #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
- #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
- static int process_escape (int);
- /* FIXME-soon: The entire lexer/parser thingy should be
- built statically at compile time rather than dynamically
- each and every time the assembler is run. xoxorich. */
- void
- do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
- {
- const char *p;
- int c;
- lex[' '] = LEX_IS_WHITESPACE;
- lex['\t'] = LEX_IS_WHITESPACE;
- lex['\r'] = LEX_IS_WHITESPACE;
- lex['\n'] = LEX_IS_NEWLINE;
- lex[':'] = LEX_IS_COLON;
- #ifdef TC_M68K
- scrub_m68k_mri = m68k_mri;
- if (! m68k_mri)
- #endif
- {
- lex['"'] = LEX_IS_STRINGQUOTE;
- #if ! defined (TC_HPPA) && ! defined (TC_I370)
- /* I370 uses single-quotes to delimit integer, float constants. */
- lex['\''] = LEX_IS_ONECHAR_QUOTE;
- #endif
- #ifdef SINGLE_QUOTE_STRINGS
- lex['\''] = LEX_IS_STRINGQUOTE;
- #endif
- }
- /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
- in state 5 of do_scrub_chars must be changed. */
- /* Note that these override the previous defaults, e.g. if ';' is a
- comment char, then it isn't a line separator. */
- for (p = symbol_chars; *p; ++p)
- lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
- for (c = 128; c < 256; ++c)
- lex[c] = LEX_IS_SYMBOL_COMPONENT;
- #ifdef tc_symbol_chars
- /* This macro permits the processor to specify all characters which
- may appears in an operand. This will prevent the scrubber from
- discarding meaningful whitespace in certain cases. The i386
- backend uses this to support prefixes, which can confuse the
- scrubber as to whether it is parsing operands or opcodes. */
- for (p = tc_symbol_chars; *p; ++p)
- lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
- #endif
- /* The m68k backend wants to be able to change comment_chars. */
- #ifndef tc_comment_chars
- #define tc_comment_chars comment_chars
- #endif
- for (p = tc_comment_chars; *p; p++)
- lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
- for (p = line_comment_chars; *p; p++)
- lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
- #ifndef tc_line_separator_chars
- #define tc_line_separator_chars line_separator_chars
- #endif
- for (p = tc_line_separator_chars; *p; p++)
- lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
- #ifdef tc_parallel_separator_chars
- /* This macro permits the processor to specify all characters which
- separate parallel insns on the same line. */
- for (p = tc_parallel_separator_chars; *p; p++)
- lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
- #endif
- /* Only allow slash-star comments if slash is not in use.
- FIXME: This isn't right. We should always permit them. */
- if (lex['/'] == 0)
- lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
- #ifdef TC_M68K
- if (m68k_mri)
- {
- lex['\''] = LEX_IS_STRINGQUOTE;
- lex[';'] = LEX_IS_COMMENT_START;
- lex['*'] = LEX_IS_LINE_COMMENT_START;
- /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
- then it can't be used in an expression. */
- lex['!'] = LEX_IS_LINE_COMMENT_START;
- }
- #endif
- #ifdef TC_V850
- lex['-'] = LEX_IS_DOUBLEDASH_1ST;
- #endif
- #ifdef DOUBLEBAR_PARALLEL
- lex['|'] = LEX_IS_DOUBLEBAR_1ST;
- #endif
- #ifdef TC_D30V
- /* Must do this is we want VLIW instruction with "->" or "<-". */
- lex['-'] = LEX_IS_SYMBOL_COMPONENT;
- #endif
- #ifdef H_TICK_HEX
- if (enable_h_tick_hex)
- {
- lex['h'] = LEX_IS_H;
- lex['H'] = LEX_IS_H;
- }
- #endif
- }
- /* Saved state of the scrubber. */
- static int state;
- static int old_state;
- static char *out_string;
- static char out_buf[20];
- static int add_newlines;
- static char *saved_input;
- static size_t saved_input_len;
- static char input_buffer[32 * 1024];
- static const char *mri_state;
- static char mri_last_ch;
- /* Data structure for saving the state of app across #include's. Note that
- app is called asynchronously to the parsing of the .include's, so our
- state at the time .include is interpreted is completely unrelated.
- That's why we have to save it all. */
- struct app_save
- {
- int state;
- int old_state;
- char * out_string;
- char out_buf[sizeof (out_buf)];
- int add_newlines;
- char * saved_input;
- size_t saved_input_len;
- #ifdef TC_M68K
- int scrub_m68k_mri;
- #endif
- const char * mri_state;
- char mri_last_ch;
- #if defined TC_ARM && defined OBJ_ELF
- const char * symver_state;
- #endif
- };
- char *
- app_push (void)
- {
- struct app_save *saved;
- saved = (struct app_save *) xmalloc (sizeof (*saved));
- saved->state = state;
- saved->old_state = old_state;
- saved->out_string = out_string;
- memcpy (saved->out_buf, out_buf, sizeof (out_buf));
- saved->add_newlines = add_newlines;
- if (saved_input == NULL)
- saved->saved_input = NULL;
- else
- {
- saved->saved_input = (char *) xmalloc (saved_input_len);
- memcpy (saved->saved_input, saved_input, saved_input_len);
- saved->saved_input_len = saved_input_len;
- }
- #ifdef TC_M68K
- saved->scrub_m68k_mri = scrub_m68k_mri;
- #endif
- saved->mri_state = mri_state;
- saved->mri_last_ch = mri_last_ch;
- #if defined TC_ARM && defined OBJ_ELF
- saved->symver_state = symver_state;
- #endif
- /* do_scrub_begin() is not useful, just wastes time. */
- state = 0;
- saved_input = NULL;
- add_newlines = 0;
- return (char *) saved;
- }
- void
- app_pop (char *arg)
- {
- struct app_save *saved = (struct app_save *) arg;
- /* There is no do_scrub_end (). */
- state = saved->state;
- old_state = saved->old_state;
- out_string = saved->out_string;
- memcpy (out_buf, saved->out_buf, sizeof (out_buf));
- add_newlines = saved->add_newlines;
- if (saved->saved_input == NULL)
- saved_input = NULL;
- else
- {
- gas_assert (saved->saved_input_len <= sizeof (input_buffer));
- memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
- saved_input = input_buffer;
- saved_input_len = saved->saved_input_len;
- free (saved->saved_input);
- }
- #ifdef TC_M68K
- scrub_m68k_mri = saved->scrub_m68k_mri;
- #endif
- mri_state = saved->mri_state;
- mri_last_ch = saved->mri_last_ch;
- #if defined TC_ARM && defined OBJ_ELF
- symver_state = saved->symver_state;
- #endif
- free (arg);
- }
- /* @@ This assumes that \n &c are the same on host and target. This is not
- necessarily true. */
- static int
- process_escape (int ch)
- {
- switch (ch)
- {
- case 'b':
- return '\b';
- case 'f':
- return '\f';
- case 'n':
- return '\n';
- case 'r':
- return '\r';
- case 't':
- return '\t';
- case '\'':
- return '\'';
- case '"':
- return '\"';
- default:
- return ch;
- }
- }
- /* This function is called to process input characters. The GET
- parameter is used to retrieve more input characters. GET should
- set its parameter to point to a buffer, and return the length of
- the buffer; it should return 0 at end of file. The scrubbed output
- characters are put into the buffer starting at TOSTART; the TOSTART
- buffer is TOLEN bytes in length. The function returns the number
- of scrubbed characters put into TOSTART. This will be TOLEN unless
- end of file was seen. This function is arranged as a state
- machine, and saves its state so that it may return at any point.
- This is the way the old code used to work. */
- size_t
- do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
- {
- char *to = tostart;
- char *toend = tostart + tolen;
- char *from;
- char *fromend;
- size_t fromlen;
- int ch, ch2 = 0;
- /* Character that started the string we're working on. */
- static char quotechar;
- /*State 0: beginning of normal line
- 1: After first whitespace on line (flush more white)
- 2: After first non-white (opcode) on line (keep 1white)
- 3: after second white on line (into operands) (flush white)
- 4: after putting out a .linefile, put out digits
- 5: parsing a string, then go to old-state
- 6: putting out \ escape in a "d string.
- 7: no longer used
- 8: no longer used
- 9: After seeing symbol char in state 3 (keep 1white after symchar)
- 10: After seeing whitespace in state 9 (keep white before symchar)
- 11: After seeing a symbol character in state 0 (eg a label definition)
- -1: output string in out_string and go to the state in old_state
- -2: flush text until a '*' '/' is seen, then go to state old_state
- #ifdef TC_V850
- 12: After seeing a dash, looking for a second dash as a start
- of comment.
- #endif
- #ifdef DOUBLEBAR_PARALLEL
- 13: After seeing a vertical bar, looking for a second
- vertical bar as a parallel expression separator.
- #endif
- #ifdef TC_PREDICATE_START_CHAR
- 14: After seeing a predicate start character at state 0, looking
- for a predicate end character as predicate.
- 15: After seeing a predicate start character at state 1, looking
- for a predicate end character as predicate.
- #endif
- #ifdef TC_Z80
- 16: After seeing an 'a' or an 'A' at the start of a symbol
- 17: After seeing an 'f' or an 'F' in state 16
- #endif
- */
- /* I added states 9 and 10 because the MIPS ECOFF assembler uses
- constructs like ``.loc 1 20''. This was turning into ``.loc
- 120''. States 9 and 10 ensure that a space is never dropped in
- between characters which could appear in an identifier. Ian
- Taylor, ian@cygnus.com.
- I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
- correctly on the PA (and any other target where colons are optional).
- Jeff Law, law@cs.utah.edu.
- I added state 13 so that something like "cmp r1, r2 || trap #1" does not
- get squashed into "cmp r1,r2||trap#1", with the all important space
- between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */
- /* This macro gets the next input character. */
- #define GET() \
- (from < fromend \
- ? * (unsigned char *) (from++) \
- : (saved_input = NULL, \
- fromlen = (*get) (input_buffer, sizeof input_buffer), \
- from = input_buffer, \
- fromend = from + fromlen, \
- (fromlen == 0 \
- ? EOF \
- : * (unsigned char *) (from++))))
- /* This macro pushes a character back on the input stream. */
- #define UNGET(uch) (*--from = (uch))
- /* This macro puts a character into the output buffer. If this
- character fills the output buffer, this macro jumps to the label
- TOFULL. We use this rather ugly approach because we need to
- handle two different termination conditions: EOF on the input
- stream, and a full output buffer. It would be simpler if we
- always read in the entire input stream before processing it, but
- I don't want to make such a significant change to the assembler's
- memory usage. */
- #define PUT(pch) \
- do \
- { \
- *to++ = (pch); \
- if (to >= toend) \
- goto tofull; \
- } \
- while (0)
- if (saved_input != NULL)
- {
- from = saved_input;
- fromend = from + saved_input_len;
- }
- else
- {
- fromlen = (*get) (input_buffer, sizeof input_buffer);
- if (fromlen == 0)
- return 0;
- from = input_buffer;
- fromend = from + fromlen;
- }
- while (1)
- {
- /* The cases in this switch end with continue, in order to
- branch back to the top of this while loop and generate the
- next output character in the appropriate state. */
- switch (state)
- {
- case -1:
- ch = *out_string++;
- if (*out_string == '\0')
- {
- state = old_state;
- old_state = 3;
- }
- PUT (ch);
- continue;
- case -2:
- for (;;)
- {
- do
- {
- ch = GET ();
- if (ch == EOF)
- {
- as_warn (_("end of file in comment"));
- goto fromeof;
- }
- if (ch == '\n')
- PUT ('\n');
- }
- while (ch != '*');
- while ((ch = GET ()) == '*')
- ;
- if (ch == EOF)
- {
- as_warn (_("end of file in comment"));
- goto fromeof;
- }
- if (ch == '/')
- break;
- UNGET (ch);
- }
- state = old_state;
- UNGET (' ');
- continue;
- case 4:
- ch = GET ();
- if (ch == EOF)
- goto fromeof;
- else if (ch >= '0' && ch <= '9')
- PUT (ch);
- else
- {
- while (ch != EOF && IS_WHITESPACE (ch))
- ch = GET ();
- if (ch == '"')
- {
- quotechar = ch;
- state = 5;
- old_state = 3;
- PUT (ch);
- }
- else
- {
- while (ch != EOF && ch != '\n')
- ch = GET ();
- state = 0;
- PUT (ch);
- }
- }
- continue;
- case 5:
- /* We are going to copy everything up to a quote character,
- with special handling for a backslash. We try to
- optimize the copying in the simple case without using the
- GET and PUT macros. */
- {
- char *s;
- ptrdiff_t len;
- for (s = from; s < fromend; s++)
- {
- ch = *s;
- if (ch == '\\'
- || ch == quotechar
- || ch == '\n')
- break;
- }
- len = s - from;
- if (len > toend - to)
- len = toend - to;
- if (len > 0)
- {
- memcpy (to, from, len);
- to += len;
- from += len;
- if (to >= toend)
- goto tofull;
- }
- }
- ch = GET ();
- if (ch == EOF)
- {
- /* This buffer is here specifically so
- that the UNGET below will work. */
- static char one_char_buf[1];
- as_warn (_("end of file in string; '%c' inserted"), quotechar);
- state = old_state;
- from = fromend = one_char_buf + 1;
- fromlen = 1;
- UNGET ('\n');
- PUT (quotechar);
- }
- else if (ch == quotechar)
- {
- state = old_state;
- PUT (ch);
- }
- #ifndef NO_STRING_ESCAPES
- else if (ch == '\\')
- {
- state = 6;
- PUT (ch);
- }
- #endif
- else if (scrub_m68k_mri && ch == '\n')
- {
- /* Just quietly terminate the string. This permits lines like
- bne label loop if we haven't reach end yet. */
- state = old_state;
- UNGET (ch);
- PUT ('\'');
- }
- else
- {
- PUT (ch);
- }
- continue;
- case 6:
- state = 5;
- ch = GET ();
- switch (ch)
- {
- /* Handle strings broken across lines, by turning '\n' into
- '\\' and 'n'. */
- case '\n':
- UNGET ('n');
- add_newlines++;
- PUT ('\\');
- continue;
- case EOF:
- as_warn (_("end of file in string; '%c' inserted"), quotechar);
- PUT (quotechar);
- continue;
- case '"':
- case '\\':
- case 'b':
- case 'f':
- case 'n':
- case 'r':
- case 't':
- case 'v':
- case 'x':
- case 'X':
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- break;
- default:
- #ifdef ONLY_STANDARD_ESCAPES
- as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
- #endif
- break;
- }
- PUT (ch);
- continue;
- #ifdef DOUBLEBAR_PARALLEL
- case 13:
- ch = GET ();
- if (ch != '|')
- abort ();
- /* Reset back to state 1 and pretend that we are parsing a
- line from just after the first white space. */
- state = 1;
- PUT ('|');
- #ifdef TC_TIC6X
- /* "||^" is used for SPMASKed instructions. */
- ch = GET ();
- if (ch == EOF)
- goto fromeof;
- else if (ch == '^')
- PUT ('^');
- else
- UNGET (ch);
- #endif
- continue;
- #endif
- #ifdef TC_Z80
- case 16:
- /* We have seen an 'a' at the start of a symbol, look for an 'f'. */
- ch = GET ();
- if (ch == 'f' || ch == 'F')
- {
- state = 17;
- PUT (ch);
- }
- else
- {
- state = 9;
- break;
- }
- case 17:
- /* We have seen "af" at the start of a symbol,
- a ' here is a part of that symbol. */
- ch = GET ();
- state = 9;
- if (ch == '\'')
- /* Change to avoid warning about unclosed string. */
- PUT ('`');
- else if (ch != EOF)
- UNGET (ch);
- break;
- #endif
- }
- /* OK, we are somewhere in states 0 through 4 or 9 through 11. */
- /* flushchar: */
- ch = GET ();
- #ifdef TC_PREDICATE_START_CHAR
- if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
- {
- state += 14;
- PUT (ch);
- continue;
- }
- else if (state == 14 || state == 15)
- {
- if (ch == TC_PREDICATE_END_CHAR)
- {
- state -= 14;
- PUT (ch);
- ch = GET ();
- }
- else
- {
- PUT (ch);
- continue;
- }
- }
- #endif
- recycle:
- #if defined TC_ARM && defined OBJ_ELF
- /* We need to watch out for .symver directives. See the comment later
- in this function. */
- if (symver_state == NULL)
- {
- if ((state == 0 || state == 1) && ch == symver_pseudo[0])
- symver_state = symver_pseudo + 1;
- }
- else
- {
- /* We advance to the next state if we find the right
- character. */
- if (ch != '\0' && (*symver_state == ch))
- ++symver_state;
- else if (*symver_state != '\0')
- /* We did not get the expected character, or we didn't
- get a valid terminating character after seeing the
- entire pseudo-op, so we must go back to the beginning. */
- symver_state = NULL;
- else
- {
- /* We've read the entire pseudo-op. If this is the end
- of the line, go back to the beginning. */
- if (IS_NEWLINE (ch))
- symver_state = NULL;
- }
- }
- #endif /* TC_ARM && OBJ_ELF */
- #ifdef TC_M68K
- /* We want to have pseudo-ops which control whether we are in
- MRI mode or not. Unfortunately, since m68k MRI mode affects
- the scrubber, that means that we need a special purpose
- recognizer here. */
- if (mri_state == NULL)
- {
- if ((state == 0 || state == 1)
- && ch == mri_pseudo[0])
- mri_state = mri_pseudo + 1;
- }
- else
- {
- /* We advance to the next state if we find the right
- character, or if we need a space character and we get any
- whitespace character, or if we need a '0' and we get a
- '1' (this is so that we only need one state to handle
- ``.mri 0'' and ``.mri 1''). */
- if (ch != '\0'
- && (*mri_state == ch
- || (*mri_state == ' '
- && lex[ch] == LEX_IS_WHITESPACE)
- || (*mri_state == '0'
- && ch == '1')))
- {
- mri_last_ch = ch;
- ++mri_state;
- }
- else if (*mri_state != '\0'
- || (lex[ch] != LEX_IS_WHITESPACE
- && lex[ch] != LEX_IS_NEWLINE))
- {
- /* We did not get the expected character, or we didn't
- get a valid terminating character after seeing the
- entire pseudo-op, so we must go back to the
- beginning. */
- mri_state = NULL;
- }
- else
- {
- /* We've read the entire pseudo-op. mips_last_ch is
- either '0' or '1' indicating whether to enter or
- leave MRI mode. */
- do_scrub_begin (mri_last_ch == '1');
- mri_state = NULL;
- /* We continue handling the character as usual. The
- main gas reader must also handle the .mri pseudo-op
- to control expression parsing and the like. */
- }
- }
- #endif
- if (ch == EOF)
- {
- if (state != 0)
- {
- as_warn (_("end of file not at end of a line; newline inserted"));
- state = 0;
- PUT ('\n');
- }
- goto fromeof;
- }
- switch (lex[ch])
- {
- case LEX_IS_WHITESPACE:
- do
- {
- ch = GET ();
- }
- while (ch != EOF && IS_WHITESPACE (ch));
- if (ch == EOF)
- goto fromeof;
- if (state == 0)
- {
- /* Preserve a single whitespace character at the
- beginning of a line. */
- state = 1;
- UNGET (ch);
- PUT (' ');
- break;
- }
- #ifdef KEEP_WHITE_AROUND_COLON
- if (lex[ch] == LEX_IS_COLON)
- {
- /* Only keep this white if there's no white *after* the
- colon. */
- ch2 = GET ();
- if (ch2 != EOF)
- UNGET (ch2);
- if (!IS_WHITESPACE (ch2))
- {
- state = 9;
- UNGET (ch);
- PUT (' ');
- break;
- }
- }
- #endif
- if (IS_COMMENT (ch)
- || ch == '/'
- || IS_LINE_SEPARATOR (ch)
- || IS_PARALLEL_SEPARATOR (ch))
- {
- if (scrub_m68k_mri)
- {
- /* In MRI mode, we keep these spaces. */
- UNGET (ch);
- PUT (' ');
- break;
- }
- goto recycle;
- }
- /* If we're in state 2 or 11, we've seen a non-white
- character followed by whitespace. If the next character
- is ':', this is whitespace after a label name which we
- normally must ignore. In MRI mode, though, spaces are
- not permitted between the label and the colon. */
- if ((state == 2 || state == 11)
- && lex[ch] == LEX_IS_COLON
- && ! scrub_m68k_mri)
- {
- state = 1;
- PUT (ch);
- break;
- }
- switch (state)
- {
- case 1:
- /* We can arrive here if we leave a leading whitespace
- character at the beginning of a line. */
- goto recycle;
- case 2:
- state = 3;
- if (to + 1 < toend)
- {
- /* Optimize common case by skipping UNGET/GET. */
- PUT (' '); /* Sp after opco */
- goto recycle;
- }
- UNGET (ch);
- PUT (' ');
- break;
- case 3:
- #ifndef TC_KEEP_OPERAND_SPACES
- /* For TI C6X, we keep these spaces as they may separate
- functional unit specifiers from operands. */
- if (scrub_m68k_mri)
- #endif
- {
- /* In MRI mode, we keep these spaces. */
- UNGET (ch);
- PUT (' ');
- break;
- }
- goto recycle; /* Sp in operands */
- case 9:
- case 10:
- #ifndef TC_KEEP_OPERAND_SPACES
- if (scrub_m68k_mri)
- #endif
- {
- /* In MRI mode, we keep these spaces. */
- state = 3;
- UNGET (ch);
- PUT (' ');
- break;
- }
- state = 10; /* Sp after symbol char */
- goto recycle;
- case 11:
- if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
- state = 1;
- else
- {
- /* We know that ch is not ':', since we tested that
- case above. Therefore this is not a label, so it
- must be the opcode, and we've just seen the
- whitespace after it. */
- state = 3;
- }
- UNGET (ch);
- PUT (' '); /* Sp after label definition. */
- break;
- default:
- BAD_CASE (state);
- }
- break;
- case LEX_IS_TWOCHAR_COMMENT_1ST:
- ch2 = GET ();
- if (ch2 == '*')
- {
- for (;;)
- {
- do
- {
- ch2 = GET ();
- if (ch2 != EOF && IS_NEWLINE (ch2))
- add_newlines++;
- }
- while (ch2 != EOF && ch2 != '*');
- while (ch2 == '*')
- ch2 = GET ();
- if (ch2 == EOF || ch2 == '/')
- break;
- /* This UNGET will ensure that we count newlines
- correctly. */
- UNGET (ch2);
- }
- if (ch2 == EOF)
- as_warn (_("end of file in multiline comment"));
- ch = ' ';
- goto recycle;
- }
- #ifdef DOUBLESLASH_LINE_COMMENTS
- else if (ch2 == '/')
- {
- do
- {
- ch = GET ();
- }
- while (ch != EOF && !IS_NEWLINE (ch));
- if (ch == EOF)
- as_warn ("end of file in comment; newline inserted");
- state = 0;
- PUT ('\n');
- break;
- }
- #endif
- else
- {
- if (ch2 != EOF)
- UNGET (ch2);
- if (state == 9 || state == 10)
- state = 3;
- PUT (ch);
- }
- break;
- case LEX_IS_STRINGQUOTE:
- quotechar = ch;
- if (state == 10)
- {
- /* Preserve the whitespace in foo "bar". */
- UNGET (ch);
- state = 3;
- PUT (' ');
- /* PUT didn't jump out. We could just break, but we
- know what will happen, so optimize a bit. */
- ch = GET ();
- old_state = 3;
- }
- else if (state == 9)
- old_state = 3;
- else
- old_state = state;
- state = 5;
- PUT (ch);
- break;
- #ifndef IEEE_STYLE
- case LEX_IS_ONECHAR_QUOTE:
- #ifdef H_TICK_HEX
- if (state == 9 && enable_h_tick_hex)
- {
- char c;
- c = GET ();
- as_warn ("'%c found after symbol", c);
- UNGET (c);
- }
- #endif
- if (state == 10)
- {
- /* Preserve the whitespace in foo 'b'. */
- UNGET (ch);
- state = 3;
- PUT (' ');
- break;
- }
- ch = GET ();
- if (ch == EOF)
- {
- as_warn (_("end of file after a one-character quote; \\0 inserted"));
- ch = 0;
- }
- if (ch == '\\')
- {
- ch = GET ();
- if (ch == EOF)
- {
- as_warn (_("end of file in escape character"));
- ch = '\\';
- }
- else
- ch = process_escape (ch);
- }
- sprintf (out_buf, "%d", (int) (unsigned char) ch);
- /* None of these 'x constants for us. We want 'x'. */
- if ((ch = GET ()) != '\'')
- {
- #ifdef REQUIRE_CHAR_CLOSE_QUOTE
- as_warn (_("missing close quote; (assumed)"));
- #else
- if (ch != EOF)
- UNGET (ch);
- #endif
- }
- if (strlen (out_buf) == 1)
- {
- PUT (out_buf[0]);
- break;
- }
- if (state == 9)
- old_state = 3;
- else
- old_state = state;
- state = -1;
- out_string = out_buf;
- PUT (*out_string++);
- break;
- #endif
- case LEX_IS_COLON:
- #ifdef KEEP_WHITE_AROUND_COLON
- state = 9;
- #else
- if (state == 9 || state == 10)
- state = 3;
- else if (state != 3)
- state = 1;
- #endif
- PUT (ch);
- break;
- case LEX_IS_NEWLINE:
- /* Roll out a bunch of newlines from inside comments, etc. */
- if (add_newlines)
- {
- --add_newlines;
- UNGET (ch);
- }
- /* Fall through. */
- case LEX_IS_LINE_SEPARATOR:
- state = 0;
- PUT (ch);
- break;
- case LEX_IS_PARALLEL_SEPARATOR:
- state = 1;
- PUT (ch);
- break;
- #ifdef TC_V850
- case LEX_IS_DOUBLEDASH_1ST:
- ch2 = GET ();
- if (ch2 != '-')
- {
- if (ch2 != EOF)
- UNGET (ch2);
- goto de_fault;
- }
- /* Read and skip to end of line. */
- do
- {
- ch = GET ();
- }
- while (ch != EOF && ch != '\n');
- if (ch == EOF)
- as_warn (_("end of file in comment; newline inserted"));
- state = 0;
- PUT ('\n');
- break;
- #endif
- #ifdef DOUBLEBAR_PARALLEL
- case LEX_IS_DOUBLEBAR_1ST:
- ch2 = GET ();
- if (ch2 != EOF)
- UNGET (ch2);
- if (ch2 != '|')
- goto de_fault;
- /* Handle '||' in two states as invoking PUT twice might
- result in the first one jumping out of this loop. We'd
- then lose track of the state and one '|' char. */
- state = 13;
- PUT ('|');
- break;
- #endif
- case LEX_IS_LINE_COMMENT_START:
- /* FIXME-someday: The two character comment stuff was badly
- thought out. On i386, we want '/' as line comment start
- AND we want C style comments. hence this hack. The
- whole lexical process should be reworked. xoxorich. */
- if (ch == '/')
- {
- ch2 = GET ();
- if (ch2 == '*')
- {
- old_state = 3;
- state = -2;
- break;
- }
- else
- {
- UNGET (ch2);
- }
- }
- if (state == 0 || state == 1) /* Only comment at start of line. */
- {
- int startch;
- startch = ch;
- do
- {
- ch = GET ();
- }
- while (ch != EOF && IS_WHITESPACE (ch));
- if (ch == EOF)
- {
- as_warn (_("end of file in comment; newline inserted"));
- PUT ('\n');
- break;
- }
- if (ch < '0' || ch > '9' || state != 0 || startch != '#')
- {
- /* Not a cpp line. */
- while (ch != EOF && !IS_NEWLINE (ch))
- ch = GET ();
- if (ch == EOF)
- {
- as_warn (_("end of file in comment; newline inserted"));
- PUT ('\n');
- }
- else /* IS_NEWLINE (ch) */
- {
- /* To process non-zero add_newlines. */
- UNGET (ch);
- }
- state = 0;
- break;
- }
- /* Looks like `# 123 "filename"' from cpp. */
- UNGET (ch);
- old_state = 4;
- state = -1;
- if (scrub_m68k_mri)
- out_string = "\tlinefile ";
- else
- out_string = "\t.linefile ";
- PUT (*out_string++);
- break;
- }
- #ifdef TC_D10V
- /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
- Trap is the only short insn that has a first operand that is
- neither register nor label.
- We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
- We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
- already LEX_IS_LINE_COMMENT_START. However, it is the
- only character in line_comment_chars for d10v, hence we
- can recognize it as such. */
- /* An alternative approach would be to reset the state to 1 when
- we see '||', '<'- or '->', but that seems to be overkill. */
- if (state == 10)
- PUT (' ');
- #endif
- /* We have a line comment character which is not at the
- start of a line. If this is also a normal comment
- character, fall through. Otherwise treat it as a default
- character. */
- if (strchr (tc_comment_chars, ch) == NULL
- && (! scrub_m68k_mri
- || (ch != '!' && ch != '*')))
- goto de_fault;
- if (scrub_m68k_mri
- && (ch == '!' || ch == '*' || ch == '#')
- && state != 1
- && state != 10)
- goto de_fault;
- /* Fall through. */
- case LEX_IS_COMMENT_START:
- #if defined TC_ARM && defined OBJ_ELF
- /* On the ARM, `@' is the comment character.
- Unfortunately this is also a special character in ELF .symver
- directives (and .type, though we deal with those another way).
- So we check if this line is such a directive, and treat
- the character as default if so. This is a hack. */
- if ((symver_state != NULL) && (*symver_state == 0))
- goto de_fault;
- #endif
- #ifdef TC_ARM
- /* For the ARM, care is needed not to damage occurrences of \@
- by stripping the @ onwards. Yuck. */
- if (to > tostart && *(to - 1) == '\\')
- /* Do not treat the @ as a start-of-comment. */
- goto de_fault;
- #endif
- #ifdef WARN_COMMENTS
- if (!found_comment)
- as_where (&found_comment_file, &found_comment);
- #endif
- do
- {
- ch = GET ();
- }
- while (ch != EOF && !IS_NEWLINE (ch));
- if (ch == EOF)
- as_warn (_("end of file in comment; newline inserted"));
- state = 0;
- PUT ('\n');
- break;
- #ifdef H_TICK_HEX
- case LEX_IS_H:
- /* Look for strings like H'[0-9A-Fa-f] and if found, replace
- the H' with 0x to make them gas-style hex characters. */
- if (enable_h_tick_hex)
- {
- char quot;
- quot = GET ();
- if (quot == '\'')
- {
- UNGET ('x');
- ch = '0';
- }
- else
- UNGET (quot);
- }
- /* FALL THROUGH */
- #endif
- case LEX_IS_SYMBOL_COMPONENT:
- if (state == 10)
- {
- /* This is a symbol character following another symbol
- character, with whitespace in between. We skipped
- the whitespace earlier, so output it now. */
- UNGET (ch);
- state = 3;
- PUT (' ');
- break;
- }
- #ifdef TC_Z80
- /* "af'" is a symbol containing '\''. */
- if (state == 3 && (ch == 'a' || ch == 'A'))
- {
- state = 16;
- PUT (ch);
- ch = GET ();
- if (ch == 'f' || ch == 'F')
- {
- state = 17;
- PUT (ch);
- break;
- }
- else
- {
- state = 9;
- if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
- {
- if (ch != EOF)
- UNGET (ch);
- break;
- }
- }
- }
- #endif
- if (state == 3)
- state = 9;
- /* This is a common case. Quickly copy CH and all the
- following symbol component or normal characters. */
- if (to + 1 < toend
- && mri_state == NULL
- #if defined TC_ARM && defined OBJ_ELF
- && symver_state == NULL
- #endif
- )
- {
- char *s;
- ptrdiff_t len;
- for (s = from; s < fromend; s++)
- {
- int type;
- ch2 = *(unsigned char *) s;
- type = lex[ch2];
- if (type != 0
- && type != LEX_IS_SYMBOL_COMPONENT)
- break;
- }
- if (s > from)
- /* Handle the last character normally, for
- simplicity. */
- --s;
- len = s - from;
- if (len > (toend - to) - 1)
- len = (toend - to) - 1;
- if (len > 0)
- {
- PUT (ch);
- memcpy (to, from, len);
- to += len;
- from += len;
- if (to >= toend)
- goto tofull;
- ch = GET ();
- }
- }
- /* Fall through. */
- default:
- de_fault:
- /* Some relatively `normal' character. */
- if (state == 0)
- {
- state = 11; /* Now seeing label definition. */
- }
- else if (state == 1)
- {
- state = 2; /* Ditto. */
- }
- else if (state == 9)
- {
- if (!IS_SYMBOL_COMPONENT (ch))
- state = 3;
- }
- else if (state == 10)
- {
- if (ch == '\\')
- {
- /* Special handling for backslash: a backslash may
- be the beginning of a formal parameter (of a
- macro) following another symbol character, with
- whitespace in between. If that is the case, we
- output a space before the parameter. Strictly
- speaking, correct handling depends upon what the
- macro parameter expands into; if the parameter
- expands into something which does not start with
- an operand character, then we don't want to keep
- the space. We don't have enough information to
- make the right choice, so here we are making the
- choice which is more likely to be correct. */
- if (to + 1 >= toend)
- {
- /* If we're near the end of the buffer, save the
- character for the next time round. Otherwise
- we'll lose our state. */
- UNGET (ch);
- goto tofull;
- }
- *to++ = ' ';
- }
- state = 3;
- }
- PUT (ch);
- break;
- }
- }
- /*NOTREACHED*/
- fromeof:
- /* We have reached the end of the input. */
- return to - tostart;
- tofull:
- /* The output buffer is full. Save any input we have not yet
- processed. */
- if (fromend > from)
- {
- saved_input = from;
- saved_input_len = fromend - from;
- }
- else
- saved_input = NULL;
- return to - tostart;
- }
|