123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554 |
- %
- % TOKEN-SCANNER.RED - Table-driven token scanner
- %
- % Author: Eric Benson
- % Symbolic Computation Group
- % Computer Science Dept.
- % University of Utah
- % Date: 27 August 1981
- % Copyright (c) 1981 University of Utah
- %
- % Edit by Cris Perdue, 29 Jan 1983 1338-PST
- % Occurrences of "dipthong" changed to "diphthong"
- % <PSL.KERNEL>TOKEN-SCANNER.RED.2, 16-Dec-82 14:55:55, Edit by BENSON
- % MakeBufIntoFloat uses floating point arithmetic on each digit
- % <PSL.INTERP>TOKEN-SCANNER.RED.6, 15-Sep-82 10:49:54, Edit by BENSON
- % Can now scan 1+ and 1-
- % <PSL.INTERP>TOKEN-SCANNER.RED.12, 10-Jan-82 21:53:28, Edit by BENSON
- % Fixed bug in floating point parsing
- % <PSL.INTERP>TOKEN-SCANNER.RED.9, 8-Jan-82 07:06:23, Edit by GRISS
- % MakeBufIntoLispInteger becomes procedure for BigNums
- % <PSL.INTERP>TOKEN-SCANNER.RED.7, 28-Dec-81 22:09:14, Edit by BENSON
- % Made dipthong indicator last element of scan table
- fluid '(CurrentScanTable!* !*Raise !*Compressing !*EOLInStringOK);
- LoadTime <<
- !*Raise := T;
- !*Compressing := NIL;
- !*EOLInStringOK := NIL;
- >>;
- CompileTime flag('(ReadInBuf MakeBufIntoID MakeBufIntoString
- MakeBufIntoLispInteger MakeBufIntoSysNumber
- MakeBufIntoFloat MakeStringIntoSysInteger
- MakeStringIntoBitString ScannerError SysPowerOf2P
- ScanPossibleDiphthong),
- 'InternalFunction);
- on SysLisp;
- % DIGITS are 0..9
- internal WConst LETTER = 10,
- DELIMITER = 11,
- COMMENTCHAR = 12,
- DIPHTHONGSTART = 13,
- IDESCAPECHAR = 14,
- STRINGQUOTE = 15,
- PACKAGEINDICATOR = 16,
- IGNORE = 17,
- MINUSSIGN = 18,
- PLUSSIGN = 19,
- DECIMALPOINT = 20,
- IDSURROUND = 21;
- internal WVar TokCh,
- TokChannel,
- ChTokenType,
- CurrentChar,
- ChangedPackages,
- TokRadix,
- TokSign,
- TokFloatFractionLength,
- TokFloatExponentSign,
- TokFloatExponent;
- CompileTime <<
- syslsp smacro procedure TokenTypeOfChar Ch;
- IntInf VecItm(VecInf LispVar CurrentScanTable!*, Ch);
- syslsp smacro procedure CurrentDiphthongIndicator();
- VecItm(VecInf LispVar CurrentScanTable!*, 128);
- syslsp smacro procedure ResetBuf();
- CurrentChar := 0;
- syslsp smacro procedure BackupBuf();
- CurrentChar := CurrentChar - 1;
- >>;
- syslsp procedure ReadInBuf();
- << TokCh := ChannelReadChar TokChannel;
- StrByt(TokenBuffer, CurrentChar) := TokCh;
- ChTokenType := TokenTypeOfChar TokCh;
- if CurrentChar < MaxTokenSize then
- CurrentChar := CurrentChar + 1
- else if CurrentChar = MaxTokenSize then
- << ErrorPrintF("***** READ Buffer overflow, Truncating");
- CurrentChar := MaxTokenSize + 1 >>
- else CurrentChar := MaxTokenSize + 1 >>;
- CompileTime <<
- syslsp smacro procedure UnReadLastChar();
- ChannelUnReadChar(Channel, TokCh);
- syslsp smacro procedure LowerCaseChar Ch;
- Ch >= char !a and Ch <= char !z;
- syslsp smacro procedure RaiseChar Ch;
- (Ch - char !a) + char A;
- syslsp smacro procedure RaiseLastChar();
- if LowerCaseChar TokCh then
- StrByt(TokenBuffer, CurrentChar - 1) := RaiseChar TokCh;
- >>;
- syslsp procedure MakeBufIntoID();
- << LispVar TokType!* := '0;
- if CurrentChar eq 1 then MkID StrByt(TokenBuffer, 0)
- else
- << StrByt(TokenBuffer, CurrentChar) := char NULL;
- TokenBuffer[0] := CurrentChar - 1;
- if LispVar !*Compressing then NewID CopyString TokenBuffer
- else Intern MkSTR TokenBuffer >> >>;
- syslsp procedure MakeBufIntoString();
- << LispVar TokType!* := '1;
- StrByt(TokenBuffer, CurrentChar) := 0;
- TokenBuffer[0] := CurrentChar - 1;
- CopyString TokenBuffer >>;
- syslsp procedure MakeBufIntoSysNumber(Radix, Sign);
- << StrByt(TokenBuffer, CurrentChar) := 0;
- TokenBuffer[0] := CurrentChar - 1;
- MakeStringIntoSysInteger(TokenBuffer, Radix, Sign) >>;
- syslsp procedure MakeBufIntoLispInteger(Radix, Sign);
- << LispVar TokType!* := '2;
- StrByt(TokenBuffer, CurrentChar) := 0;
- TokenBuffer[0] := CurrentChar - 1;
- MakeStringIntoLispInteger(MkSTR TokenBuffer, Radix, Sign) >>;
- internal WArray MakeFloatTemp1[1],
- MakeFloatTemp2[1],
- FloatTen[1];
- % Changed to use floating point arithmetic on the characters, rather
- % than converting to an integer. This avoids overflow problems.
- syslsp procedure MakeBufIntoFloat Exponent;
- begin scalar F, N;
- !*WFloat(FloatTen, 10);
- !*WFloat(MakeFloatTemp1, 0);
- N := CurrentChar - 1;
- for I := 0 step 1 until N do
- << !*WFloat(MakeFloatTemp2, DigitToNumber StrByt(TokenBuffer, I));
- !*FTimes2(MakeFloatTemp1, MakeFloatTemp1, FloatTen);
- !*FPlus2(MakeFloatTemp1, MakeFloatTemp1, MakeFloatTemp2) >>;
- if Exponent > 0 then
- for I := 1 step 1 until Exponent do
- !*FTimes2(MakeFloatTemp1, MakeFloatTemp1, FloatTen)
- else if Exponent < 0 then
- << Exponent := -Exponent;
- for I := 1 step 1 until Exponent do
- !*FQuotient(MakeFloatTemp1, MakeFloatTemp1, FloatTen) >>;
- LispVar TokType!* := '2;
- F := GtFLTN();
- !*FAssign(FloatBase F, MakeFloatTemp1);
- return MkFLTN F;
- end;
- syslsp procedure ChannelReadToken Channel; %. Token scanner
- %
- % This is the basic Lisp token scanner. The value returned is a Lisp
- % item corresponding to the next token from the input stream. IDs will
- % be interned. The global Lisp variable TokType!* will be set to
- % 0 if the token is an ordinary ID,
- % 1 if the token is a string (delimited by double quotes),
- % 2 if the token is a number, or
- % 3 if the token is an unescaped delimiter.
- % In the last case, the value returned by this function will be the single
- % character ID corresponding to the delimiter.
- %
- begin
- TokChannel := Channel;
- ChangedPackages := 0;
- ResetBuf();
- StartScanning:
- TokCh := ChannelReadChar Channel;
- ChTokenType := TokenTypeOfChar TokCh;
- if ChTokenType eq IGNORE then goto StartScanning;
- StrByt(TokenBuffer, CurrentChar) := TokCh;
- CurrentChar := CurrentChar + 1;
- case ChTokenType of
- 0 to 9: % digit
- << TokSign := 1;
- goto InsideNumber >>;
- 10: % Start of ID
- << if null LispVar !*Raise then
- goto InsideID
- else
- << RaiseLastChar();
- goto InsideRaisedID >> >>;
- 11: % Delimiter, but not beginning of Diphthong
- << LispVar TokType!* := '3;
- return MkID TokCh >>;
- 12: % Start of comment
- goto InsideComment;
- 13: % Diphthong start - Lisp function uses P-list of starting char
- return ScanPossibleDiphthong(TokChannel, MkID TokCh);
- 14: % ID escape character
- << if null LispVar !*Raise then
- goto GotEscape
- else goto GotEscapeInRaisedID >>;
- 15: % string quote
- << BackupBuf();
- goto InsideString >>;
- 16: % Package indicator - at start of token means use global package
- << ResetBuf();
- ChangedPackages := 1;
- Package 'Global;
- if null LispVar !*Raise then
- goto GotPackageMustGetID
- else goto GotPackageMustGetIDRaised >>;
- 17: % Ignore - can't ever happen
- ScannerError("Internal error - consult a wizard");
- 18: % Minus sign
- << TokSign := -1;
- goto GotSign >>;
- 19: % Plus sign
- << TokSign := 1;
- goto GotSign >>;
- 20: % decimal point
- << ResetBuf();
- ReadInBuf();
- if ChTokenType >= 10 then
- << UnReadLastChar();
- return ScanPossibleDiphthong(TokChannel, '!.) >>
- else
- << TokSign := 1;
- TokFloatFractionLength := 1;
- goto InsideFloatFraction >> >>;
- 21: % IDSURROUND, i.e. vertical bars
- << BackupBuf();
- goto InsideIDSurround >>;
- default:
- return ScannerError("Unknown token type")
- end;
- GotEscape:
- BackupBuf();
- ReadInBuf();
- goto InsideID;
- InsideID:
- ReadInBuf();
- if ChTokenType <= 10
- or ChTokenType eq PLUSSIGN
- or ChTokenType eq MINUSSIGN then goto InsideID
- else if ChTokenType eq IDESCAPECHAR then goto GotEscape
- else if ChTokenType eq PACKAGEINDICATOR then
- << BackupBuf();
- ChangedPackages := 1;
- Package MakeBufIntoID();
- ResetBuf();
- goto GotPackageMustGetID >>
- else
- << UnReadLastChar();
- BackupBuf();
- if ChangedPackages neq 0 then Package LispVar CurrentPackage!*;
- return MakeBufIntoID() >>;
- GotPackageMustGetID:
- ReadInBuf();
- if ChTokenType eq LETTER then goto InsideID
- else if ChTokenType eq IDESCAPECHAR then goto GotEscape
- else ScannerError("Illegal to follow package indicator with non ID");
- GotEscapeInRaisedID:
- BackupBuf();
- ReadInBuf();
- goto InsideRaisedID;
- InsideRaisedID:
- ReadInBuf();
- if ChTokenType < 10
- or ChTokenType eq PLUSSIGN
- or ChTokenType eq MINUSSIGN then goto InsideRaisedID
- else if ChTokenType eq 10 then
- << RaiseLastChar();
- goto InsideRaisedID >>
- else if ChTokenType eq IDESCAPECHAR then goto GotEscapeInRaisedID
- else if ChTokenType eq PACKAGEINDICATOR then
- << BackupBuf();
- ChangedPackages := 1;
- Package MakeBufIntoID();
- ResetBuf();
- goto GotPackageMustGetIDRaised >>
- else
- << UnReadLastChar();
- BackupBuf();
- if ChangedPackages neq 0 then Package LispVar CurrentPackage!*;
- return MakeBufIntoID() >>;
- GotPackageMustGetIDRaised:
- ReadInBuf();
- if ChTokenType eq LETTER then
- << RaiseLastChar();
- goto InsideRaisedID >>
- else if ChTokenType eq IDESCAPECHAR then goto GotEscapeInRaisedID
- else ScannerError("Illegal to follow package indicator with non ID");
- InsideString:
- ReadInBuf();
- if ChTokenType eq STRINGQUOTE then
- << BackupBuf();
- ReadInBuf();
- if ChTokenType eq STRINGQUOTE then goto InsideString
- else
- << UnReadLastChar();
- BackupBuf();
- return MakeBufIntoString() >> >>
- else if TokCh eq char EOL and not LispVar !*EOLInStringOK then
- ErrorPrintF("*** String continued over end-of-line")
- else if TokCh eq char EOF then
- ScannerError("EOF encountered inside a string");
- goto InsideString;
- InsideIDSurround:
- ReadInBuf();
- if ChTokenType eq IDSURROUND then
- << BackupBuf();
- return MakeBufIntoID() >>
- else if ChTokenType eq IDESCAPECHAR then
- << BackupBuf();
- ReadInBuf() >>
- else if TokCh eq char EOF then
- ScannerError("EOF encountered inside an ID");
- goto InsideIDSurround;
- GotSign:
- ResetBuf();
- ReadInBuf();
- if TokCh eq char !. then
- << PutStrByt(TokenBuffer, 0, char !0);
- CurrentChar := 2;
- goto InsideFloat >>
- else if ChTokenType eq LETTER % patch to be able to read 1+ and 1-
- or ChTokenType eq MINUSSIGN
- or ChTokenType eq PLUSSIGN then
- << ResetBuf();
- StrByt(TokenBuffer, 0) := if TokSign < 0 then char !- else char !+;
- StrByt(TokenBuffer, 1) := TokCh;
- CurrentChar := 2;
- if LispVar !*Raise then
- << RaiseLastChar();
- goto InsideRaisedID >>
- else goto InsideID >>
- else if ChTokenType eq IDESCAPECHAR then
- << ResetBuf();
- StrByt(TokenBuffer, 0) := if TokSign < 0 then char !- else char !+;
- CurrentChar := 1;
- if LispVar !*Raise then
- goto GotEscapeInRaisedID
- else goto GotEscape >>
- else if ChTokenType > 9 then
- << UnReadLastChar(); % Allow + or - to start a Diphthong
- return ScanPossibleDiphthong(Channel,
- MkID(if TokSign < 0 then char !-
- else char !+)) >>
- else goto InsideNumber;
- InsideNumber:
- ReadInBuf();
- if ChTokenType < 10 then goto InsideNumber;
- if TokCh eq char !# then
- << BackupBuf();
- TokRadix := MakeBufIntoSysNumber(10, 1);
- ResetBuf();
- if TokRadix < 2 or TokRadix > 36 then
- return ScannerError("Radix out of range");
- if TokRadix <= 10 then goto InsideIntegerRadixUnder10
- else goto InsideIntegerRadixOver10 >>
- else if TokCh eq char !. then goto InsideFloat
- else if TokCh eq char B or TokCh eq char !b then
- << BackupBuf();
- return MakeBufIntoLispInteger(8, TokSign) >>
- else if TokCh eq char E or TokCh eq char !e then
- << TokFloatFractionLength := 0;
- goto InsideFloatExponent >>
- else if ChTokenType eq LETTER % patch to be able to read 1+ and 1-
- or ChTokenType eq MINUSSIGN
- or ChTokenType eq PLUSSIGN then
- if LispVar !*Raise then
- << RaiseLastChar();
- goto InsideRaisedID >>
- else goto InsideID
- else if ChTokenType eq IDESCAPECHAR then
- if LispVar !*Raise then
- goto GotEscapeInRaisedID
- else goto GotEscape
- else
- << UnReadLastChar();
- BackupBuf();
- return MakeBufIntoLispInteger(10, TokSign) >>;
- InsideIntegerRadixUnder10:
- ReadInBuf();
- if ChTokenType < TokRadix then goto InsideIntegerRadixUnder10;
- if ChTokenType < 10 then return ScannerError("Digit out of range");
- NumReturn:
- UnReadLastChar();
- BackupBuf();
- return MakeBufIntoLispInteger(TokRadix, TokSign);
- InsideIntegerRadixOver10:
- ReadInBuf();
- if ChTokenType < 10 then goto InsideIntegerRadixOver10;
- if ChTokenType > 10 then goto NumReturn;
- if LowerCaseChar TokCh then
- << TokCh := RaiseChar TokCh;
- StrByt(TokenBuffer, CurrentChar - 1) := TokCh >>;
- if TokCh >= char A - 10 + TokRadix then goto NumReturn;
- goto InsideIntegerRadixOver10;
- InsideFloat: % got decimal point inside number
- BackupBuf();
- ReadInBuf();
- if TokCh eq char E or TokCh eq char !e then
- << TokFloatFractionLength := 0;
- goto InsideFloatExponent >>;
- if ChTokenType >= 10 then % nnn. is floating point number
- << UnReadLastChar();
- BackupBuf();
- return MakeBufIntoFloat 0 >>;
- TokFloatFractionLength := 1;
- InsideFloatFraction:
- ReadInBuf();
- if ChTokenType < 10 then
- << if TokFloatFractionLength < 9 then
- TokFloatFractionLength := TokFloatFractionLength + 1
- else BackupBuf(); % don't overflow mantissa
- goto InsideFloatFraction >>;
- if TokCh eq char E or TokCh eq char lower e then goto InsideFloatExponent;
- UnReadLastChar();
- BackupBuf();
- return MakeBufIntoFloat(-TokFloatFractionLength);
- InsideFloatExponent:
- BackupBuf();
- TokFloatExponentSign := 1;
- TokFloatExponent := 0;
- TokCh := ChannelReadChar TokChannel;
- ChTokenType := TokenTypeOfChar TokCh;
- if ChTokenType < 10 then
- << TokFloatExponent := ChTokenType;
- goto DigitsInsideExponent >>;
- if TokCh eq char '!- then TokFloatExponentSign := -1
- else if TokCh neq char '!+ then
- return ScannerError("Missing exponent in float");
- TokCh := ChannelReadChar TokChannel;
- ChTokenType := TokenTypeOfChar TokCh;
- if ChTokenType >= 10 then
- return ScannerError("Missing exponent in float");
- TokFloatExponent := ChTokenType;
- DigitsInsideExponent:
- TokCh := ChannelReadChar TokChannel;
- ChTokenType := TokenTypeOfChar TokCh;
- if ChTokenType < 10 then
- << TokFloatExponent := TokFloatExponent * 10 + ChTokenType;
- goto DigitsInsideExponent >>;
- ChannelUnReadChar(Channel, TokCh);
- return MakeBufIntoFloat(TokFloatExponentSign * TokFloatExponent
- - TokFloatFractionLength);
- InsideComment:
- if (TokCh := ChannelReadChar Channel) eq char EOL then
- << ResetBuf();
- goto StartScanning >>
- else if TokCh eq char EOF then return LispVar !$EOF!$
- else goto InsideComment;
- end;
- syslsp procedure RAtom(); %. Read token from current input
- ChannelReadToken LispVar IN!*;
- syslsp procedure DigitToNumber D;
- %
- % if D is not a digit then it is assumed to be an uppercase letter
- %
- if D >= char !0 and D <= char !9 then D - char !0 else D - (char A - 10);
- syslsp procedure MakeStringIntoLispInteger(S, Radix, Sign);
- Sys2Int MakeStringIntoSysInteger(S, Radix, Sign);
- syslsp procedure MakeStringIntoSysInteger(Strng, Radix, Sign);
- %
- % Unsafe string to integer conversion. Strng is assumed to contain
- % only digits and possibly uppercase letters for radices > 10. Since it
- % uses multiplication, arithmetic overflow may occur. Sign is +1 or -1
- %
- begin scalar Count, Tot, RadixExponent;
- if RadixExponent := SysPowerOf2P Radix then return
- MakeStringIntoBitString(Strng, Radix, RadixExponent, Sign);
- Strng := StrInf Strng;
- Count := StrLen Strng;
- Tot := 0;
- for I := 0 step 1 until Count do
- Tot := Tot * Radix + DigitToNumber StrByt(Strng, I);
- return if Sign < 0 then -Tot else Tot;
- end;
- syslsp procedure MakeStringIntoBitString(Strng, Radix, RadixExponent, Sign);
- begin scalar Count, Tot;
- Strng := StrInf Strng;
- Count := StrLen Strng;
- Tot := 0;
- for I := 0 step 1 until Count do
- << Tot := LSH(Tot, RadixExponent);
- Tot := LOR(Tot, DigitToNumber StrByt(Strng, I)) >>;
- if Sign < 0 then return -Tot;
- return Tot;
- end;
- syslsp procedure SysPowerOf2P Num;
- case Num of
- 1: 0;
- 2: 1;
- 4: 2;
- 8: 3;
- 16: 4;
- 32: 5;
- default: NIL
- end;
- syslsp procedure ScannerError Message;
- StdError BldMsg("***** Error in token scanner: %s", Message);
- syslsp procedure ScanPossibleDiphthong(Channel, StartChar);
- begin scalar Alst, Target, Ch;
- LispVar TokType!* := '3;
- if null (Alst := get(StartChar, CurrentDiphthongIndicator())) then
- return StartChar;
- if null (Target := Atsoc(Ch := MkID ChannelReadChar Channel, Alst)) then
- << ChannelUnReadChar(Channel, IDInf Ch);
- return StartChar >>;
- return cdr Target;
- end;
- syslsp procedure ReadLine();
- << MakeInputAvailable();
- ChannelReadLine LispVar IN!* >>;
- syslsp procedure ChannelReadLine Chn;
- begin scalar C;
- TokenBuffer[0] := -1;
- while (C := ChannelReadChar Chn) neq char EOL and C neq char EOF do
- << TokenBuffer[0] := TokenBuffer[0] + 1;
- StrByt(TokenBuffer, TokenBuffer[0]) := C >>;
- return if TokenBuffer[0] >= 0 then
- << StrByt(TokenBuffer, TokenBuffer[0] + 1) := char NULL;
- CopyString MkSTR TokenBuffer >>
- else '"";
- end;
- % Dummy definition of package conversion function
- syslsp procedure Package U;
- NIL;
- % Dummy definition of MakeInputAvailable, redefined by Emode
- syslsp procedure MakeInputAvailable();
- NIL;
- off SysLisp;
- END;
|