Object subclass: #AbstractScanner instanceVariableNames: 'source nextChar token tokenType buffer ' classVariableNames: '' poolDictionaries: '' category: 'Compilers-Scanners'! AbstractScanner comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= I scan a source string and break it up into tokens using mechanisms provided in concrete subclasses. Instance Variables: source - character input stream. nextChar - one-character lookahead buffer for source, nil if no input left. token - current token buffer. tokenType - current token type buffer. buffer - character accumulation buffer for tokens. '! !AbstractScanner methodsFor: 'initialization'! init self buffer: (RetractableWriteStream on: (String new: 32))! reset "Reset the initial state of the scanner before scanning a new source." self buffer reset. self token: nil. self tokenType: nil. self nextChar: nil! scanSource: aString "Convert the input string to a read stream and scan the first token." self reset. self source: (RetractableReadStream on: aString). self nextChar: self source next. self scanToken! ! !AbstractScanner methodsFor: 'state accessing'! buffer ^buffer! buffer: argument buffer := argument! nextChar ^nextChar! nextChar: argument nextChar := argument! source ^source! source: argument source := argument! token ^token! token: argument token := argument! tokenType ^tokenType! tokenType: argument tokenType := argument! ! !AbstractScanner methodsFor: 'scanning'! backspaceSource "When the source is at the end, 'source current' is the last character." self atEnd ifFalse: [self source backspace]. self nextChar: self source current! getNextChar "Source will answer an empty string when no more input is available. Subclasses may override this to avoid unnecessary buffering." self buffer nextPut: self nextChar. self nextChar: self source next! putBackChar "Remove the last character in the buffer and backspace the source. Subclasses may override this to avoid unnecessary buffering." self buffer backspace. self backspaceSource! scanToken "Subclasses must compute values for token and tokenType here." self subclassResponsibility! signalEndOfInput "Set scanner to the end-of-input state." self tokenType: self endOfInputTokenType. self token: self endOfInputToken! ! !AbstractScanner methodsFor: 'testing'! atEnd ^self nextChar = self endOfInputToken! ! !AbstractScanner methodsFor: 'accessing'! contents ^self source contents! endOfInputToken "Answer a token representing the end of the input." self subclassResponsibility! endOfInputTokenType "Answer the token type representing the end of the input." self subclassResponsibility! errorPosition "Answer the source position of the last acceptable character." ^source position + (self atEnd ifTrue: [1] ifFalse: [0]) max: 1! position ^self source position! ! "-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- "! AbstractScanner class instanceVariableNames: ''! !AbstractScanner class methodsFor: 'instance creation'! new ^super new init! scanFrom: aString | newScanner | newScanner := self new. newScanner scanSource: aString. ^newScanner! ! AbstractScanner subclass: #FSABasedScanner instanceVariableNames: 'fsa ' classVariableNames: '' poolDictionaries: '' category: 'Compilers-Scanners'! FSABasedScanner comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= I am an abstract class of scanner that scans a source string and breaks it up into tokens using a minimal deterministic finite-state automata (FSA). Each token is also given a type by its associated final state in the FSA. Specific FSAs are stored in class instance variables of my concrete subclasses. Instance Variables: fsa - a local reference to the token recognizer, in minimal deterministic form, for this class of scanner. '! !FSABasedScanner methodsFor: 'state accessing'! fsa ^fsa! fsa: argument fsa := argument! ! !FSABasedScanner methodsFor: 'scanning directives'! compactDoubleApostrophes "Compact all two apostrophe sequences in my current token into a single apostrophe." | readStream writeStream ch nextCh | readStream := ReadStream on: self token. writeStream := WriteStream on: (String new: 20). [readStream atEnd] whileFalse: [writeStream nextPut: (ch := readStream next). (ch = $' and: [(nextCh := readStream peek) notNil and: [nextCh = $']]) ifTrue: [readStream skip: 1]]. self token: writeStream contents! ignoreComment self scanToken! ignoreDelimiter self scanToken! ! !FSABasedScanner methodsFor: 'accessing'! endOfInputToken "Answer a token representing the end of the input." ^Character endOfInput! endOfInputTokenType "Answer the token type representing the end of the input." ^self endOfInputToken! myFsa ^self class fsa! startState ^self fsa! ! !FSABasedScanner methodsFor: 'scanning'! scanToken "Scan the next token and compute its token type." | state nextState tok typeAction | self atEnd ifTrue: [self signalEndOfInput] ifFalse: [state := self startState. [(nextState := self at: state transitionFor: self nextChar) isNil] whileFalse: [state := nextState. self getNextChar]. tok := self buffer contents. typeAction := self at: state tokenTypeAndActionFor: tok. self tokenType: typeAction type. self token: tok. self buffer reset. typeAction action notNil ifTrue: [self perform: typeAction action]]! ! !FSABasedScanner methodsFor: 'initialization'! init super init. self fsa: self myFsa! ! !FSABasedScanner methodsFor: 'scanner generation'! classInitializationMethodTextForClassNamed: name spec: tokenSpec ^self subclassResponsibility! createScannerClassNamed: name category: category spec: tokenSpec | scannerClass | scannerClass := self defaultScannerClass subclass: name asSymbol instanceVariableNames: '' classVariableNames: '' poolDictionaries: '' category: category. scannerClass comment: self generatedScannerClassComment. scannerClass class compile: (self classInitializationMethodTextForClassNamed: name spec: tokenSpec) classified: 'class initialization'. scannerClass initialize. ^scannerClass! defaultOptimizedScannerClass ^OptimizedScanner! defaultScannerClass ^self class! generatedScannerClassComment ^'This scanner class was automatically generated by ', TranslatorGenerator versionName , '.'! newStreamForMethodRendering | ws | ws := WriteStream on: (String new: 2048). ws policy printCharactersLiterally: true. ^ws! ! !FSABasedScanner methodsFor: 'converting'! fastScanner ^self defaultOptimizedScannerClass buildFrom: self! ! !FSABasedScanner methodsFor: 'private'! at: state tokenTypeAndActionFor: tok ^state tokenTypeAndActionFor: tok! at: state transitionFor: char ^state transitionFor: char! ! "-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- "! FSABasedScanner class instanceVariableNames: 'fsa '! !FSABasedScanner class methodsFor: 'state accessing'! fsa ^fsa! fsa: argument fsa := argument! ! !FSABasedScanner class methodsFor: 'class initialization'! initialize "Concrete subclasses must somehow provide a fsa. Subclasses created by automatic means may simply 'plug-in' a dynamically computed fsa. However, if a class that can be filed-out is desired then it is worthwhile to override this initialization method with one that can build the appropriate fsa directly." "FSABasedScanner initialize" self fsa: nil! ! FSABasedScanner subclass: #FSABasedLookaheadScanner instanceVariableNames: 'savePosition ' classVariableNames: '' poolDictionaries: '' category: 'Compilers-Scanners'! FSABasedLookaheadScanner comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= This is an abstract class for scanners with lookahead. Instance Variables: savePosition - pointer into input source for error notification.'! !FSABasedLookaheadScanner methodsFor: 'initialization'! reset "Reset the initial state of the scanner before scanning a new source." super reset. self savePosition: 0! ! !FSABasedLookaheadScanner methodsFor: 'state accessing'! savePosition ^savePosition! savePosition: argument savePosition := argument! ! !FSABasedLookaheadScanner methodsFor: 'accessing'! errorPosition "Answer the source position of the last acceptable character." ^self savePosition max: 1! ! FSABasedLookaheadScanner subclass: #FSABasedScannerWithOneTokenLookahead instanceVariableNames: '' classVariableNames: '' poolDictionaries: '' category: 'Compilers-Scanners'! FSABasedScannerWithOneTokenLookahead comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= This class provides a scanner with simple one-token lookahead. '! !FSABasedScannerWithOneTokenLookahead methodsFor: 'scanner generation'! defaultOptimizedScannerClass ^OptimizedScannerWithOneTokenLookahead! ! !FSABasedScannerWithOneTokenLookahead methodsFor: 'scanning'! scanToken "Scan the next token and compute its token type." | nextState tok typeAction stateStack saveChar saveState | stateStack := Stack new. self atEnd ifTrue: [self signalEndOfInput] ifFalse: [stateStack push: self startState. [(nextState := stateStack top transitionFor: self nextChar ifNone: [nil]) isNil] whileFalse: [stateStack push: nextState. self getNextChar]. "save the current position for error notification" self savePosition: self position + (self atEnd ifTrue: [1] ifFalse: [0]). stateStack top isFSAFinalState ifFalse: [saveChar := self nextChar. saveState := stateStack top. "backup to the previous final state or to the start state" [stateStack size = 1 or: [stateStack top isFSAFinalState]] whileFalse: [stateStack pop. self putBackChar]. stateStack size = 1 ifTrue: ["backed up to the start state so signal an error" saveState transitionFor: saveChar]]. "answer the newly scanned token" tok := self buffer contents. typeAction := stateStack top tokenTypeAndActionFor: tok. self tokenType: typeAction type. self token: tok. self buffer reset. typeAction action notNil ifTrue: [self perform: typeAction action]]! ! FSABasedLookaheadScanner subclass: #FSABasedScannerWithTwoTokenLookahead instanceVariableNames: 'stateStack saveState saveChar ' classVariableNames: '' poolDictionaries: '' category: 'Compilers-Scanners'! FSABasedScannerWithTwoTokenLookahead comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= This class provides a scanner with simple two-token lookahead. Instance Variables: stateStack - primary state stack for scanning tokens. saveState - pointer into input source for error notification. saveChar - pointer into input source for error notification.'! !FSABasedScannerWithTwoTokenLookahead methodsFor: 'scanner generation'! defaultOptimizedScannerClass ^OptimizedScannerWithTwoTokenLookahead! ! !FSABasedScannerWithTwoTokenLookahead methodsFor: 'scanning'! checkForTokenIn: newStateStack buffer: charBuffer "Scan the input using the arguments. Answer true if a legal token (or no illegal token) was found and false otherwise." | nextState | self atEnd ifFalse: [newStateStack push: self startState. "look for longest possible token" [(nextState := newStateStack top transitionFor: self nextChar ifNone: [nil]) isNil] whileFalse: [newStateStack push: nextState. "getNextChar for local vars" charBuffer nextPut: self nextChar. self nextChar: self source next]. "save the current position for error notification" self savePosition: self position + (self atEnd ifTrue: [1] ifFalse: [0]). newStateStack top isFSAFinalState ifFalse: [self saveChar: self nextChar. self saveState: newStateStack top. "backup to the previous final state or to the start state" [newStateStack size = 1 or: [newStateStack top isFSAFinalState]] whileFalse: [newStateStack pop. "putBackChar for local vars" charBuffer backspace. self backspaceSource]. newStateStack size = 1 ifTrue: ["backed up to the start state" self stateStack == newStateStack ifTrue: ["this is the first token, so signal an error (abort and return)" self saveState transitionFor: self saveChar] ifFalse: ["we may be able to backup in the previous token" ^false]]]]. ^true! scanToken "Scan the next token and compute its token type." | tok typeAction newStateStack charBuffer | newStateStack := Stack new. charBuffer := RetractableWriteStream on: (String new: 32). (self checkForTokenIn: newStateStack buffer: charBuffer) ifTrue: ["either a legal token or the end on input was found" self stateStack isEmpty ifTrue: [self atEnd ifTrue: [^self signalEndOfInput] ifFalse: [self error: 'no more vaild tokens']]. tok := self buffer contents. typeAction := self stateStack top tokenTypeAndActionFor: tok. self tokenType: typeAction type. self token: tok. self buffer: charBuffer. self stateStack: newStateStack. typeAction action notNil ifTrue: [self perform: typeAction action]] ifFalse: ["an illegal token was found, try to look for earlier final state in current token buffers" charBuffer size timesRepeat: ["put back illegal token chars" self backspaceSource]. "backup in current token to next smallest legal token" [self stateStack size = 1 or: [self stateStack pop. self putBackChar. self stateStack top isFSAFinalState]] whileFalse. self stateStack size = 1 ifTrue: ["no smaller legal token so signal error" self saveState transitionFor: self saveChar] ifFalse: ["try again" self scanToken]]! ! !FSABasedScannerWithTwoTokenLookahead methodsFor: 'state accessing'! saveChar ^saveChar! saveChar: argument saveChar := argument! saveState ^saveState! saveState: argument saveState := argument! stateStack ^stateStack! stateStack: argument stateStack := argument! ! !FSABasedScannerWithTwoTokenLookahead methodsFor: 'initialization'! reset "Reset the initial state of the scanner before scanning a new source." super reset. self stateStack: Stack new! scanSource: aString "Convert the input string to a read stream and scan the first token." self reset. self source: (RetractableReadStream on: aString). self nextChar: self source next. self checkForTokenIn: self stateStack buffer: self buffer. self scanToken! ! AbstractScanner subclass: #HandCodedScanner instanceVariableNames: 'charTypeTable ' classVariableNames: '' poolDictionaries: '' category: 'Compilers-Scanners'! HandCodedScanner comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= I am an abstract class of scanner that scans a source string and breaks it up into tokens using a character type table and hand-coded scanner methods. Specific type tables are stored in class instance variables of my concrete subclasses. Instance Variables: charTypeTable - a local reference to the type table for this class of scanner; the ascii value of each character is mapped to a symbol token type. '! !HandCodedScanner methodsFor: 'state accessing'! charTypeTable ^charTypeTable! charTypeTable: argument charTypeTable := argument! ! !HandCodedScanner methodsFor: 'initialization'! init super init. self charTypeTable: self myTypeTable! ! !HandCodedScanner methodsFor: 'accessing'! endOfInputToken "Answer a token representing the end of the input." ^nil! endOfInputTokenType "Answer the token type representing the end of the input." ^#doIt! myTypeTable ^self class charTypeTable! ! !HandCodedScanner methodsFor: 'testing'! atStartOfComplexToken "Answer true if the first character of the tokenType is an $x and false otherwise." ^(self tokenType at: 1) = $x! ! !HandCodedScanner methodsFor: 'scanning'! scanToken "Scan the next token and compute its token type. This may be overridden in subclasses for efficiency and customization." [self atEnd ifTrue: [^self signalEndOfInput]. self tokenType: (self charTypeTable at: self nextChar asInteger). self tokenType == #xDelimiter] whileTrue: ["Skip delimiters fast, there almost always is one." self getNextChar]. self atStartOfComplexToken ifTrue: ["perform to compute token & type" self perform: tokenType] ifFalse: ["else just the character" self token: self nextChar. self getNextChar]! ! "-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- "! HandCodedScanner class instanceVariableNames: 'charTypeTable '! !HandCodedScanner class methodsFor: 'class initialization'! initialize "Concrete subclasses must provide a character type table." "HandCodedScanner initialize" | newTable | newTable := Array new: 256 withAll: #xDefault. "default" self charTypeTable: newTable! ! !HandCodedScanner class methodsFor: 'state accessing'! charTypeTable ^charTypeTable! charTypeTable: argument charTypeTable := argument! ! FSABasedScanner subclass: #OptimizedScanner instanceVariableNames: 'finalStateTable ' classVariableNames: 'NoTransitionSignal ' poolDictionaries: '' category: 'Compilers-Scanners'! OptimizedScanner comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= I am an abstract class of scanner that scans a source string and breaks it up into tokens using a table created by converting FSA to integer. instance Variables: finalStateTable - a table that maps integer ( represented as final state ) to literal tokens and token classes. '! !OptimizedScanner methodsFor: 'converting'! assignNextIDAfter: id toSuccessorOf: state "I try to assing a number to fsa in order to create a fsa table." | nextID nextState | nextID := id + 1. state edgeLabelMap associationsDo: [:assoc | nextState := assoc value. nextState stateID isNil ifTrue: [nextState stateID: nextID. nextState isFSAFinalState ifTrue: [(finalStateTable includes: nextState) ifFalse: [finalStateTable at: nextID put: nextState]]. nextID := self assignNextIDAfter: nextID toSuccessorOf: nextState]]. ^nextID! changeFSAToObjectTable: fsaState | sizePlusOne objectTable | fsaState stateID notNil ifTrue: [fsaState nilOutStateIDs]. fsaState stateID: self startState. self finalStateTable: Dictionary new. sizePlusOne := self assignNextIDAfter: self startState toSuccessorOf: fsaState. objectTable := Array new: sizePlusOne - 1. self convert: fsaState to: objectTable. self modifyFSAFinalStates: sizePlusOne - 1. "convert Dictionary to Array for speed" ^objectTable! convert: state to: objectTable "I try to create a table that maps state ( represented by integer ) to state" | arr nextState | arr := Array new: 127. objectTable at: state stateID put: arr. state edgeLabelMap associationsDo: [:assoc | nextState := assoc value. (objectTable at: nextState stateID) isNil ifTrue: [self convert: nextState to: objectTable]. arr at: assoc key asInteger put: nextState stateID]. ^objectTable! convertToTable: fsaScanner self fsa: (self changeFSAToObjectTable: fsaScanner fsa)! modifyFSAFinalStates: index "Convert Dictionary and its values to Array of Array" | tokenSet table | table := Array new: index. finalStateTable do: [:st | tokenSet := Array new: 2. tokenSet at: 1 put: st literalTokens asOrderedCollection asArray; at: 2 put: st tokenClasses asArray. table at: st stateID put: tokenSet]. self finalStateTable: table! ! !OptimizedScanner methodsFor: 'private'! at: state transitionFor: char | value | (value := (fsa at: state) at: char asInteger) isNil ifTrue: [(finalStateTable at: state) isNil ifTrue: [self raiseNoTransitionExceptionErrorString: (char == self endOfInputToken ifTrue: [self endOfInputErrorString] ifFalse: [self standardErrorString , '''' , char printString , ''''])]]. ^value! ! !OptimizedScanner methodsFor: 'initialization'! init super init. self finalStateTable: self myFinalStateTable! ! !OptimizedScanner methodsFor: 'state accessing'! finalStateTable ^finalStateTable! finalStateTable: arg finalStateTable := arg! ! !OptimizedScanner methodsFor: 'accessing'! myFinalStateTable ^self class finalStateTable! startState ^1! ! !OptimizedScanner methodsFor: 'exception handling'! endOfInputErrorString ^'end of input encountered'! raiseNoTransitionExceptionErrorString: aString self class noTransitionSignal raiseErrorString: aString! standardErrorString ^'illegal character encountered: '! ! !OptimizedScanner methodsFor: 'testing'! atEnd ^nextChar == self endOfInputToken "end-of-file character"! ! !OptimizedScanner methodsFor: 'reconstructing'! reconstructFinalStateTableOn: aStream aStream nextPutAll: 'table := '. finalStateTable reconstructOn: aStream. aStream period; crtab; nextPutAll: 'self constructFinalStateTable: table'! reconstructFSAOn: aStream aStream nextPutAll: 'self fsa: '. fsa reconstructOn: aStream. aStream period; crtab! reconstructOn: aStream "Recreate fsa and final state tables" self reconstructFSAOn: aStream. self reconstructFinalStateTableOn: aStream! ! !OptimizedScanner methodsFor: 'scanner generation'! classInitializationMethodTextForClassNamed: name spec: tokenSpec | ws | ws := self newStreamForMethodRendering. ws nextPutAll: 'initialize'; crtab; nextPut: $"; nextPutAll: name; nextPutAll: ' initialize"'; crtab; nextPut: $". tokenSpec do: [:ch | "double embedded double-quote characters" ws nextPut: ch. ch = $" ifTrue: [ws nextPut: $"]]. ws nextPut: $"; cr; crtab; nextPutAll: '| table |'; crtab. self reconstructOn: ws. ^ws contents! ! !OptimizedScanner methodsFor: 'scanning'! at: state tokenTypeAndActionFor: aString "The current implementation does not handle overlapping token classes. Hence, a final state can only represent a literal or a single token class. Therefore, if not a literal then it must be the token class." | tc | (((finalStateTable at: state) at: 1) includes: aString) ifTrue: [^TokenTypeActionHolder type: aString action: nil]. tc := ((finalStateTable at: state) at: 2) first . ^TokenTypeActionHolder type: tc tokenType action: tc action! getNextChar "Source will answer an eof char when no more input is available. Subclasses may override this to avoid unnecessary buffering." buffer nextPut: nextChar. nextChar := source next! signalEndOfInput "Set scanner to the end-of-input state." tokenType := token := self endOfInputToken! ! "-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- "! OptimizedScanner class instanceVariableNames: 'finalStateTable tokenTable '! !OptimizedScanner class methodsFor: 'state accessing'! finalStateTable ^finalStateTable! finalStateTable: arg finalStateTable := arg! noTransitionSignal ^NoTransitionSignal! noTransitionSignal: arg NoTransitionSignal := arg! tokenTable ^tokenTable! tokenTable: arg tokenTable := arg! ! !OptimizedScanner class methodsFor: 'class initialization'! initialize "OptimizedScanner initialize" self noTransitionSignal: (Signal new nameClass: self message: #noTransitionSymbol).! ! !OptimizedScanner class methodsFor: 'reconstructing'! constructFinalStateTable: arg finalStateTable := Array new: arg size. 1 to: arg size do: [:index | finalStateTable at: index put: ((arg at: index) isNil ifTrue: [nil] ifFalse: [Array with: ((arg at: index) at: 1) with: (self constructTokenClassification: ((arg at: index) at: 2))])]! constructTokenClassification: aCollection | tc ea arr | aCollection size == 1 ifTrue: [tc := aCollection first. ^Array with: (TokenClassification tokenType: (tc at: 1) action: (tc at: 2))] ifFalse: [arr := Array new: aCollection size. 1 to: aCollection size do: [:index | ea := aCollection at: index. arr at: index put: (TokenClassification tokenType: (ea at: 1) action: (ea at: 2))]. ^arr]! ! !OptimizedScanner class methodsFor: 'instance creation'! buildFrom: fsaScanner ^self new convertToTable: fsaScanner! ! OptimizedScanner subclass: #OptimizedLookaheadScanner instanceVariableNames: 'savePosition ' classVariableNames: '' poolDictionaries: '' category: 'Compilers-Scanners'! OptimizedLookaheadScanner comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= This is an abstract class for table-based optimized scanners with lookahead. Instance Variables: savePosition - pointer into input source for error notification.'! !OptimizedLookaheadScanner methodsFor: 'accessing'! errorPosition "Answer the source position of the last acceptable character." ^self savePosition max: 1! ! !OptimizedLookaheadScanner methodsFor: 'initialization'! reset "Reset the initial state of the scanner before scanning a new source." super reset. self savePosition: 0! ! !OptimizedLookaheadScanner methodsFor: 'state accessing'! savePosition ^savePosition! savePosition: argument savePosition := argument! ! !OptimizedLookaheadScanner methodsFor: 'testing'! isFSAFinalState: aState "Answer true if aState is a final state, false otherwise." ^(self finalStateTable at: aState) notNil! ! OptimizedLookaheadScanner subclass: #OptimizedScannerWithOneTokenLookahead instanceVariableNames: '' classVariableNames: '' poolDictionaries: '' category: 'Compilers-Scanners'! OptimizedScannerWithOneTokenLookahead comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= This class provides a table-based optimized scanner with simple one-token lookahead. '! !OptimizedScannerWithOneTokenLookahead methodsFor: 'scanning'! scanToken "Scan the next token and compute its token type." | nextState tok typeAction stateStack saveChar saveState | stateStack := Stack new. self atEnd ifTrue: [self signalEndOfInput] ifFalse: [stateStack push: self startState. [(nextState := (fsa at: stateStack top) at: self nextChar asInteger) isNil] whileFalse: [stateStack push: nextState. self getNextChar]. "save the current position for error notification" self savePosition: self position + (self atEnd ifTrue: [1] ifFalse: [0]). (self isFSAFinalState: stateStack top) ifFalse: ["save the current position for error notification" saveChar := self nextChar. saveState := stateStack top. "backup to the previous final state or to the start state" [stateStack size = 1 or: [self isFSAFinalState: stateStack top]] whileFalse: [stateStack pop. self putBackChar]. stateStack size = 1 ifTrue: ["backed up to the start state so signal an error" self at: saveState transitionFor: saveChar]]. "answer the newly scanned token" tok := self buffer contents. typeAction := self at: stateStack top tokenTypeAndActionFor: tok. self tokenType: typeAction type. self token: tok. self buffer reset. typeAction action notNil ifTrue: [self perform: typeAction action]]! ! OptimizedLookaheadScanner subclass: #OptimizedScannerWithTwoTokenLookahead instanceVariableNames: 'stateStack saveState saveChar ' classVariableNames: '' poolDictionaries: '' category: 'Compilers-Scanners'! OptimizedScannerWithTwoTokenLookahead comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= This class provides a table-based optimized scanner with simple two-token lookahead. Instance Variables: stateStack - primary state stack for scanning tokens. saveState - pointer into input source for error notification. saveChar - pointer into input source for error notification.'! !OptimizedScannerWithTwoTokenLookahead methodsFor: 'scanning'! checkForTokenIn: newStateStack buffer: charBuffer "Scan the input using the arguments. Answer true if a legal token (or no illegal token) was found and false otherwise." | nextState | self atEnd ifFalse: [newStateStack push: self startState. "look for longest possible token" [(nextState := (fsa at: newStateStack top) at: self nextChar asInteger) isNil] whileFalse: [newStateStack push: nextState. "getNextChar for local vars" charBuffer nextPut: self nextChar. self nextChar: self source next]. "save the current position for error notification" self savePosition: self position + (self atEnd ifTrue: [1] ifFalse: [0]). (self isFSAFinalState: newStateStack top) ifFalse: ["save the current position for error notification" saveChar := self nextChar. saveState := newStateStack top. "backup to the previous final state or to the start state" [newStateStack size = 1 or: [self isFSAFinalState: newStateStack top]] whileFalse: [newStateStack pop. "putBackChar for local vars" charBuffer backspace. self backspaceSource]. newStateStack size = 1 ifTrue: ["backed up to the start state" self stateStack == newStateStack ifTrue: ["this is the first token, so signal an error (abort and return)" self at: saveState transitionFor: saveChar] ifFalse: ["we may be able to backup in the previous token" ^false]]]]. ^true! scanToken "Scan the next token and compute its token type." | tok typeAction newStateStack charBuffer | newStateStack := Stack new. charBuffer := RetractableWriteStream on: (String new: 32). (self checkForTokenIn: newStateStack buffer: charBuffer) ifTrue: ["either a legal token or the end on input was found" self stateStack isEmpty ifTrue: [self atEnd ifTrue: [^self signalEndOfInput] ifFalse: [self error: 'no more vaild tokens']]. tok := self buffer contents. typeAction := self at: stateStack top tokenTypeAndActionFor: tok. self tokenType: typeAction type. self token: tok. self buffer: charBuffer. self stateStack: newStateStack. typeAction action notNil ifTrue: [self perform: typeAction action]] ifFalse: ["an illegal token was found, try to look for earlier final state in current token buffers" charBuffer size timesRepeat: ["put back illegal token chars" self backspaceSource]. "backup in current token to next smallest legal token" [self stateStack size = 1 or: [self stateStack pop. self putBackChar. self isFSAFinalState: stateStack top]] whileFalse. self stateStack size = 1 ifTrue: ["no smaller legal token so signal error" self at: saveState transitionFor: saveChar] ifFalse: ["try again" self scanToken]]! ! !OptimizedScannerWithTwoTokenLookahead methodsFor: 'initialization'! reset "Reset the initial state of the scanner before scanning a new source." super reset. self stateStack: Stack new! scanSource: aString "Convert the input string to a read stream and scan the first token." self reset. self source: (RetractableReadStream on: aString). self nextChar: self source next. self checkForTokenIn: self stateStack buffer: self buffer. self scanToken! ! !OptimizedScannerWithTwoTokenLookahead methodsFor: 'state accessing'! saveChar ^saveChar! saveChar: argument saveChar := argument! saveState ^saveState! saveState: argument saveState := argument! stateStack ^stateStack! stateStack: argument stateStack := argument! ! FSABasedScanner initialize! HandCodedScanner initialize! OptimizedScanner initialize!