ParseTreeNode subclass: #RegularExpressionNode instanceVariableNames: '' classVariableNames: '' poolDictionaries: '' category: 'T-gen-Regular Expression Nodes'! RegularExpressionNode comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= I am the root of a regular expression tree.'! !RegularExpressionNode methodsFor: 'private'! alternationNodeClass ^AlternationNode! characterNodeClass ^CharacterNode! concatenationNodeClass ^ConcatenationNode! createNewProductionWithLHS: lhsNode andRHS: anOrderedCollection "Create a new RRPGProductionNode given its lhs and a collection of regular expression nodes whose concatenation forms the rhs. Note: anOrderedCollection may contain ConcatenationNodes which need to be 'expanded' to avoid nesting within the ConcatenationNode created by this method. Since each transformation is done individually (i.e. this method gets invoked each time a new production is created), the CatenationNodes will never be nested more then one deep." | rhs rhsNode childNodes | rhs := anOrderedCollection isEmpty ifTrue: [self epsilonNodeClass new] ifFalse: [anOrderedCollection size = 1 ifTrue: [anOrderedCollection first] ifFalse: [childNodes := OrderedCollection new. anOrderedCollection do: [:node | node isConcatenationNode ifTrue: [childNodes addAllLast: node children] ifFalse: [childNodes addLast: node]]. self concatenationNodeClass children: childNodes]]. rhsNode := self rhsNodeClass symbols: rhs translationSymbol: nil. ^self productionNodeClass leftHandSide: lhsNode rightHandSides: rhsNode! epsilon "Answer an object used to represent the empty string (epsilon)." ^self epsilonNodeClass epsilon! epsilonNodeClass ^EpsilonNode! fsaFinalStateClass ^FSAFinalState! fsaStateClass ^FSAState! nameExtensionString "Answer a string appropriate for creating new nonterminals by suffixing." ^'0'! newNonterminal: lhsNode with: lhsNames | lhs | lhs := lhsNode. lhsNames isEmpty ifTrue: [lhsNames add: lhs] ifFalse: [[lhsNames includes: lhs] whileTrue: [lhs := lhs , '`']. lhsNames add: lhs]. ^NonterminalNode new setAttribute: lhs! newNonterminalFrom: lhsNode excluding: lhsNames "Answer a new nonterminal based on lhsNode that is not already in lhsNames." | newName | newName := lhsNode symbol. [newName := newName , self nameExtensionString. lhsNames includes: newName] whileTrue. lhsNames add: newName. ^self nonterminalNodeClass symbol: newName! nonterminalNodeClass ^NonterminalNode! productionNodeClass ^RRPGProductionNode! rhsNodeClass ^RRPGRightHandSideNode! starClosureNodeClass ^StarClosureNode! tokenClassificationClass ^TokenClassification! ! !RegularExpressionNode methodsFor: 'converting'! asDFSA | fsa | fsa := self asFSA. ^fsa asDeterministicFSA! asFSA | startState finalState | startState := self fsaStateClass new. finalState := self fsaFinalStateClass new. self asFSAStartingAt: startState endingAt: finalState. ^startState! asFSAStartingAt: startState endingAt: finalState self subclassResponsibility! asFSAWithLiteral: literal startingAt: startState | finalState | "First, build main fsa." finalState := self fsaFinalStateClass new. self asFSAStartingAt: startState endingAt: finalState. finalState addLiteralToken: literal. ^startState! asFSAWithType: type andAction: action startingAt: startState | finalState | "First, build main fsa." finalState := self fsaFinalStateClass new. self asFSAStartingAt: startState endingAt: finalState. finalState addTokenClass: (self tokenClassificationClass tokenType: type action: action). ^startState! asPureRegExpr "Answer a new version of the receiver consisting of only characters, concatenations, alternations, and (star) closures. Also, eliminate single child alternations and concatenations." ^self "default"! minimize: recognizer ^recognizer asMinimalDFSA! ! !RegularExpressionNode methodsFor: 'traversing'! addNonemptyLeavesTo: aSet ^self "default is do nothing"! collectNonemptyLeavesIn: aSet self postorderDo: [:child | child addNonemptyLeavesTo: aSet]! ! !RegularExpressionNode methodsFor: 'transforming'! needsTransforming ^false! ! !RegularExpressionNode methodsFor: 'building parse trees'! alternateWith: node ^node alternateWithNonAltNode: self! alternateWithAltNode: node node children addLast: self. ^node! alternateWithNonAltNode: node ^self alternationNodeClass children: (OrderedCollection with: node with: self)! concatenateWith: node ^node concatenateWithNonCatNode: self! concatenateWithCatNode: node node children addLast: self. ^node! concatenateWithNonCatNode: node ^self concatenationNodeClass children: (OrderedCollection with: node with: self)! ! RegularExpressionNode subclass: #EpsilonNode instanceVariableNames: '' classVariableNames: '' poolDictionaries: '' category: 'T-gen-Regular Expression Nodes'! EpsilonNode comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= I represent the empty regular expression.'! !EpsilonNode methodsFor: 'building parse trees'! concatenateWith: node ^node! concatenateWithCatNode: node ^node! concatenateWithNonCatNode: node ^node! ! !EpsilonNode methodsFor: 'printing'! printOn: aStream aStream nextPutAll: ''! ! !EpsilonNode methodsFor: 'converting'! asFSAStartingAt: startState endingAt: finalState startState goto: finalState on: self epsilon! ! !EpsilonNode methodsFor: 'collecting'! collectSymbol ^OrderedCollection new! ! !EpsilonNode methodsFor: 'testing'! isEpsilonNode ^true! ! !EpsilonNode methodsFor: 'transforming'! transformUsing: lhsNames withLHS: lhsNode alpha: alphaNodes gamma: gammaNodes "Productions of the form 'A -> ' do not need transforming. Signal this by answering an empty collection." ^OrderedCollection new! ! "-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- "! EpsilonNode class instanceVariableNames: ''! !EpsilonNode class methodsFor: 'constants'! epsilon "Answer an object used to represent the empty string." ^Character endOfInput! ! RegularExpressionNode subclass: #BinaryRegExprNode instanceVariableNames: 'leftChild rightChild ' classVariableNames: '' poolDictionaries: '' category: 'T-gen-Regular Expression Nodes'! BinaryRegExprNode comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= I am a binary regular expression. Instance Variables: leftChild rightChild '! !BinaryRegExprNode methodsFor: 'building parse trees'! addChildrenFirst: anOrderedCollection anOrderedCollection size = 1 ifTrue: [self leftChild: anOrderedCollection removeFirst] ifFalse: [self error: 'wrong number of children']! addChildrenInitial: anOrderedCollection anOrderedCollection size = 2 ifTrue: [self leftChild: anOrderedCollection removeFirst. self rightChild: anOrderedCollection removeFirst] ifFalse: [anOrderedCollection size = 1 ifTrue: [self rightChild: anOrderedCollection removeFirst] ifFalse: [self error: ' wrong number of children']]! ! !BinaryRegExprNode methodsFor: 'state accessing'! leftChild ^leftChild! leftChild: argument leftChild := argument! rightChild ^rightChild! rightChild: argument rightChild := argument! ! !BinaryRegExprNode methodsFor: 'traversing'! childrenDo: aBlock "Evaluate aBlock for each of my children." aBlock value: self leftChild. aBlock value: self rightChild! updateChildrenUsing: aBlock "Replace my children according to the value of aBlock." self leftChild: (aBlock value: self leftChild). self rightChild: (aBlock value: self rightChild)! ! "-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- "! BinaryRegExprNode class instanceVariableNames: ''! !BinaryRegExprNode class methodsFor: 'instance creation'! leftChild: arg1 rightChild: arg2 | newMe | newMe := self new. newMe leftChild: arg1. newMe rightChild: arg2. ^newMe! ! RegularExpressionNode subclass: #CharRangeNode instanceVariableNames: 'firstChar lastChar ' classVariableNames: '' poolDictionaries: '' category: 'T-gen-Regular Expression Nodes'! CharRangeNode comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= I represent the alternations of each atomic character in a range. I.e. ''firstChar-lastChar'' = ''firstChar | ... | lastChar''. I may only appear as a child of a AlternationRangeNode or a ComplementedAlternationRangeNode. Instance Variables: firstChar lastChar '! !CharRangeNode methodsFor: 'state accessing'! firstChar ^firstChar! firstChar: argument firstChar := argument! lastChar ^lastChar! lastChar: argument lastChar := argument! ! !CharRangeNode methodsFor: 'building parse trees'! addChildrenInitial: anOrderedCollection anOrderedCollection size = 2 ifTrue: [self firstChar: anOrderedCollection removeFirst. self lastChar: anOrderedCollection removeFirst] ifFalse: [self error: 'wrong number of children']! ! !CharRangeNode methodsFor: 'printing'! printOn: aStream self firstChar printOn: aStream. aStream nextPut: $-. self lastChar printOn: aStream! ! !CharRangeNode methodsFor: 'private'! makeCharNodeFor: aChar "Answer a new CharacterNode for aChar." ^self characterNodeClass charSpec: (String with: aChar)! ! !CharRangeNode methodsFor: 'converting'! addCharsTo: aCollection "Add each character in my range to aCollection." self firstChar asInteger to: self lastChar asInteger do: [:ascii | aCollection add: (Character value: ascii)]! addPureCharNodesTo: childNodes "Add CharacterNodes for each character in my range to childNodes." self firstChar asInteger to: self lastChar asInteger do: [:ascii | childNodes add: (self makeCharNodeFor: (Character value: ascii))]! ! RegularExpressionNode subclass: #UnaryRegExprNode instanceVariableNames: 'onlyChild ' classVariableNames: '' poolDictionaries: '' category: 'T-gen-Regular Expression Nodes'! UnaryRegExprNode comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= I am a unary regular expression.'! !UnaryRegExprNode methodsFor: 'state accessing'! onlyChild ^onlyChild! onlyChild: argument onlyChild := argument! ! !UnaryRegExprNode methodsFor: 'traversing'! childrenDo: aBlock "Evaluate aBlock for each of my children." aBlock value: self onlyChild! updateChildrenUsing: aBlock "Replace my children according to the value of aBlock." self onlyChild: (aBlock value: self onlyChild)! ! !UnaryRegExprNode methodsFor: 'building parse trees'! addChildrenInitial: anOrderedCollection anOrderedCollection size = 1 ifTrue: [self onlyChild: anOrderedCollection removeFirst] ifFalse: [self error: 'wrong number of children']! ! !UnaryRegExprNode methodsFor: 'transforming'! needsTransforming ^true! ! "-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- "! UnaryRegExprNode class instanceVariableNames: ''! !UnaryRegExprNode class methodsFor: 'instance creation'! onlyChild: arg1 | newMe | newMe := self new. newMe onlyChild: arg1. ^newMe! ! UnaryRegExprNode subclass: #OptionalNode instanceVariableNames: '' classVariableNames: '' poolDictionaries: '' category: 'T-gen-Regular Expression Nodes'! OptionalNode comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= I represent the regular expression ''{expr}'' which denotes ''expr | ''.'! !OptionalNode methodsFor: 'printing'! printOn: aStream aStream nextPut: $(. self onlyChild printOn: aStream. aStream nextPutAll: ')?'! ! !OptionalNode methodsFor: 'converting'! asPureRegExpr "Answer a new version of the receiver consisting of only characters, concatenations, alternations, and (star) closures." ^self alternationNodeClass children: (OrderedChildren with: self onlyChild asPureRegExpr with: self epsilonNodeClass new)! ! !OptionalNode methodsFor: 'transforming'! transformUsing: lhsNames withLHS: lhsNode alpha: alphaNodes gamma: gammaNodes "Transform productions of the form 'A -> alpha beta? gamma' into 'A -> alpha B', 'B -> beta B', and 'B -> gamma'." | newProds newLHS | newProds := OrderedCollection new. newLHS := self newNonterminalFrom: lhsNode excluding: lhsNames. newProds add: (self createNewProductionWithLHS: lhsNode andRHS: (alphaNodes addLast: newLHS; yourself)). newProds add: (self createNewProductionWithLHS: newLHS andRHS: ((gammaNodes copy) addFirst: self onlyChild; yourself)). newProds add: (self createNewProductionWithLHS: newLHS andRHS: gammaNodes copy). ^newProds! ! BinaryRegExprNode subclass: #ListNode instanceVariableNames: '' classVariableNames: '' poolDictionaries: '' category: 'T-gen-Regular Expression Nodes'! ListNode comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= I am an instance of the regular expression ''a list b'' which is equivalent to a(ba)*.'! !ListNode methodsFor: 'printing'! printOn: aStream aStream nextPut: $(. self leftChild printOn: aStream. aStream nextPutAll: ' ^ '. self rightChild printOn: aStream. aStream nextPut: $)! ! !ListNode methodsFor: 'transforming'! needsTransforming ^true! transformUsing: lhsNames withLHS: lhsNode alpha: alphaNodes gamma: gammaNodes "Transform productions of the form 'A -> alpha beta1 list beta2 gamma' into 'A -> alpha B', 'B -> beta1 gamma', and 'B -> beta1 beta2 B'." | newProds newLHS | newProds := OrderedCollection new. newLHS := self newNonterminalFrom: lhsNode excluding: lhsNames. newProds add: (self createNewProductionWithLHS: lhsNode andRHS: (alphaNodes addLast: newLHS; yourself)). newProds add: (self createNewProductionWithLHS: newLHS andRHS: (gammaNodes addFirst: self leftChild copy; yourself)). newProds add: (self createNewProductionWithLHS: newLHS andRHS: (OrderedCollection with: self leftChild copy with: self rightChild copy with: newLHS)). ^newProds! ! RegularExpressionNode subclass: #CharacterNode instanceVariableNames: 'charSpec ' classVariableNames: '' poolDictionaries: '' category: 'T-gen-Regular Expression Nodes'! CharacterNode comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= I am a atomic character of a regular expression. Instance Variables: charSpec - contains the character atom specification for this node (different types of specifications are represented by subclasses).'! !CharacterNode methodsFor: 'state accessing'! charSpec ^charSpec! charSpec: argument charSpec := argument! ! !CharacterNode methodsFor: 'building parse trees'! setAttribute: value self charSpec: value! ! !CharacterNode methodsFor: 'printing'! printOn: aStream self charSpec printOn: aStream! ! !CharacterNode methodsFor: 'traversing'! addNonemptyLeavesTo: aSet aSet add: self myChar! ! !CharacterNode methodsFor: 'converting'! addCharsTo: aCollection "Add each character in my range to aCollection." aCollection add: self myChar! addPureCharNodesTo: childNodes childNodes add: self! asFSAStartingAt: startState endingAt: finalState startState goto: finalState on: self myChar! ! !CharacterNode methodsFor: 'accessing'! asInteger ^self myChar asInteger! myChar "Answer the Character represented by the receiver." self charSpec size = 1 ifTrue: [^self charSpec first] ifFalse: [self error: 'Only single character regular expressions atoms are currently supported.']! ! "-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- "! CharacterNode class instanceVariableNames: ''! !CharacterNode class methodsFor: 'instance creation'! charSpec: arg1 | newMe | newMe := self new. newMe charSpec: arg1. ^newMe! ! CharacterNode subclass: #HexadecimalCharNode instanceVariableNames: '' classVariableNames: '' poolDictionaries: '' category: 'T-gen-Regular Expression Nodes'! HexadecimalCharNode comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= I am a atomic character of a regular expression. I am specified by a String of the form ''\xHH'' where each H is a hexadecimal digit (0-9, a-f or A-F) and HH is my corresponding ASCII value. '! !HexadecimalCharNode methodsFor: 'accessing'! myChar "Answer the Character represented by the receiver. The spec is of the form '\xHH'." | spec | spec := self charSpec. (spec size = 4 and: [spec first = $\ and: [(spec at: 2) = $x]]) ifTrue: [^Character value: ('16r' , (spec copyFrom: 3 to: 4)) asNumber] ifFalse: [self error: 'Hexadecimal character specifications must be of the form ''\xHH''.']! ! CharacterNode subclass: #OctalCharNode instanceVariableNames: '' classVariableNames: '' poolDictionaries: '' category: 'T-gen-Regular Expression Nodes'! OctalCharNode comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= I am a atomic character of a regular expression. I am specified by a String of the form ''\oOOO'' where each O is a octal digit (0-7) and OOO is my corresponding ASCII value. '! !OctalCharNode methodsFor: 'accessing'! myChar "Answer the Character represented by the receiver. The spec is of the form '\oOOO'." | spec | spec := self charSpec. (spec size = 5 and: [spec first = $\ and: [(spec at: 2) = $o]]) ifTrue: [^Character value: ('8r' , (spec copyFrom: 3 to: 5)) asNumber] ifFalse: [self error: 'Octal character specifications must be of the form ''\oOOO''.']! ! CharacterNode subclass: #DecimalCharNode instanceVariableNames: '' classVariableNames: '' poolDictionaries: '' category: 'T-gen-Regular Expression Nodes'! DecimalCharNode comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= I am a atomic character of a regular expression. I am specified by a String of the form ''\ddd'' where each d is a decimal digit and ddd is my corresponding ASCII value. '! !DecimalCharNode methodsFor: 'accessing'! myChar "Answer the Character represented by the receiver. The spec is of the form '\ddd'." | spec | spec := self charSpec. (spec size = 4 and: [spec first = $\]) ifTrue: [^Character value: (spec copyFrom: 2 to: 4) asNumber] ifFalse: [self error: 'Decimal character specifications must be of the form ''\ddd''.']! ! CharacterNode subclass: #EscapedCharNode instanceVariableNames: '' classVariableNames: 'SpecialCharMap ' poolDictionaries: '' category: 'T-gen-Regular Expression Nodes'! EscapedCharNode comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= I am a atomic character of a regular expression. I am specified by a String of the form ''\c'' where each c is some character. Normally, the character I represent is just c, however, some specifications have special meanings (see comment in class initialization method).'! !EscapedCharNode methodsFor: 'accessing'! myChar "Answer the Character represented by the receiver. The spec is of the form '\c' where some c's are special (see class comment)." | spec char | spec := self charSpec. (spec size = 2 and: [spec first = $\]) ifTrue: [char := spec last. ^self specialChars at: char ifAbsent: [char]] ifFalse: [self error: 'Escaped character specifications must be of the form ''\c''.']! specialChars ^SpecialCharMap! ! "-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- "! EscapedCharNode class instanceVariableNames: ''! !EscapedCharNode class methodsFor: 'initialization'! initialize "The following characters are special: spec ascii character ---- ---- -------- \0 0 null \b 8 backspace \t 9 horizontal tab \n 10 linefeed (UNIX newline \n) \f 12 form feed \r 13 carriage return (Smalltalk cr) \e 27 escape \s 32 space \d 127 delete" "EscapedCharNode initialize" | dict | dict := Dictionary new. dict at: $0 put: (Character value: 0); at: $b put: (Character value: 8); at: $t put: (Character value: 9); at: $n put: (Character value: 10); at: $f put: (Character value: 12); at: $r put: (Character value: 13); at: $e put: (Character value: 27); at: $s put: (Character value: 32); at: $d put: (Character value: 127). SpecialCharMap := dict! ! RegularExpressionNode subclass: #EnnaryRegExprNode instanceVariableNames: 'children ' classVariableNames: '' poolDictionaries: '' category: 'T-gen-Regular Expression Nodes'! EnnaryRegExprNode comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= I represent an n-ary regular expression.'! !EnnaryRegExprNode methodsFor: 'state accessing'! children ^children! children: argument children := argument! ! !EnnaryRegExprNode methodsFor: 'traversing'! childrenDo: aBlock "Evaluate aBlock for each of my children." self children do: aBlock! updateChildrenUsing: aBlock "Replace my children according to the value of aBlock." self children: (self children collect: aBlock)! ! !EnnaryRegExprNode methodsFor: 'building parse trees'! addChildrenFirst: anOrderedCollection self children addAllFirst: anOrderedCollection! addChildrenInitial: anOrderedCollection self children addAll: anOrderedCollection! ! !EnnaryRegExprNode methodsFor: 'initialization'! init self children: OrderedCollection new! ! !EnnaryRegExprNode methodsFor: 'converting'! asPureRegExpr "Answer a new version of the receiver consisting of only characters, concatenations, alternations, and (star) closures. Also, eliminate single child alternations and concatenations." | newKids | self children size = 1 ifTrue: [^self children first asPureRegExpr]. newKids := OrderedChildren new. self childrenDo: [:child | newKids add: child asPureRegExpr]. ^self species children: newKids! ! "-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- "! EnnaryRegExprNode class instanceVariableNames: ''! !EnnaryRegExprNode class methodsFor: 'instance creation'! children: arg1 | newMe | newMe := super new. newMe children: arg1. ^newMe! new ^super new init! ! EnnaryRegExprNode subclass: #ConcatenationNode instanceVariableNames: '' classVariableNames: '' poolDictionaries: '' category: 'T-gen-Regular Expression Nodes'! ConcatenationNode comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= I represent the concatenation of two or more regular expressions, i.e. child1 child2 ... childN.'! !ConcatenationNode methodsFor: 'printing'! printOn: aStream self children do: [:child | child printOn: aStream. child == self children last ifFalse: [aStream space]]! ! !ConcatenationNode methodsFor: 'converting'! asFSAStartingAt: startState endingAt: finalState | prevState newState | prevState := startState. self children do: [:child | child == self children last ifTrue: [child asFSAStartingAt: prevState endingAt: finalState] ifFalse: [newState := self fsaStateClass new. child asFSAStartingAt: prevState endingAt: newState. prevState := newState]]! ! !ConcatenationNode methodsFor: 'transforming'! transformUsing: lhsNames withLHS: lhsNode alpha: alphaNodes gamma: gammaNodes "Transform productions of the form 'A -> alpha (beta)op gamma'. Transform AlternationNodes at the last possible moment to minimize the number of new nonterminals created. Transform all consequitive operator nodes right to left to avoid unnecessary 'tail-splitting', which increases the grammar size." | node | node := self children reverseDetect: [:child | child needsTransforming] ifNone: [self children reverseDetect: [:child | child isAlternationNode] ifNone: [^OrderedCollection new]]. alphaNodes addAllLast: self children. self children reverseDo: [:child | child == node ifTrue: [alphaNodes removeLast. ^node transformUsing: lhsNames withLHS: lhsNode alpha: alphaNodes gamma: gammaNodes] ifFalse: [gammaNodes addFirst: alphaNodes removeLast]]. self error: 'Control should never reach this point.'! ! !ConcatenationNode methodsFor: 'testing'! isConcatenationNode ^true! ! !ConcatenationNode methodsFor: 'collecting'! collectSymbol ^self children collect: [:sym | sym asGrammarSymbol]! ! !ConcatenationNode methodsFor: 'building parse trees'! concatenateWith: node ^node concatenateWithCatNode: self! concatenateWithCatNode: node node children addAllLast: children. ^node! concatenateWithNonCatNode: node self children addFirst: node. ^self! ! EnnaryRegExprNode subclass: #AlternationNode instanceVariableNames: '' classVariableNames: '' poolDictionaries: '' category: 'T-gen-Regular Expression Nodes'! AlternationNode comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= I represent the alternation of two or more regular expressions, i.e. child1 | child2 | ... | childN.'! !AlternationNode methodsFor: 'printing'! printOn: aStream aStream nextPut: $(. self children do: [:child | child printOn: aStream. child == self children last ifFalse: [aStream nextPutAll: ' | ']]. aStream nextPut: $)! ! !AlternationNode methodsFor: 'converting'! asFSAStartingAt: startState endingAt: finalState self children do: [:child | child asFSAStartingAt: startState endingAt: finalState]! ! !AlternationNode methodsFor: 'transforming'! transformUsing: lhsNames withLHS: lhsNode "I am a top-level node (e.g. A : B | C | D). Break-up the right-hand-side into separate productions and answer a collection of these productions." | newProds | newProds := OrderedCollection new. self childrenDo: [:child | newProds add: (self productionNodeClass leftHandSide: lhsNode rightHandSides: child)]. ^newProds! transformUsing: lhsNames withLHS: lhsNode alpha: alphaNodes gamma: gammaNodes "Transform productions of the form 'A -> alpha ( beta1 | beta2 ) gamma' into 'A -> alpha beta1 gamma' and 'A -> alpha beta2 gamma'." | newProds | newProds := OrderedCollection new. self childrenDo: [:beta | newProds add: (self createNewProductionWithLHS: lhsNode andRHS: ((OrderedCollection new) addAllLast: alphaNodes copy; addLast: beta; addAllLast: gammaNodes copy; yourself))]. ^newProds! ! !AlternationNode methodsFor: 'testing'! isAlternationNode ^true! ! !AlternationNode methodsFor: 'private'! processTransformation: lhs | newNode prods | prods := OrderedCollection new. children do: [:node | newNode := RRPGProductionNode new. newNode leftHandSide: lhs. newNode rightHandSides: (RRPGRightHandSideNode new symbols: node). prods add: newNode]. ^prods! ! !AlternationNode methodsFor: 'building parse trees'! alternateWith: node ^node alternateWithAltNode: self! alternateWithAltNode: node node children addAllLast: children. ^node! alternateWithNonAltNode: node self children addFirst: node. ^self! ! EnnaryRegExprNode subclass: #AlternationRangeNode instanceVariableNames: '' classVariableNames: '' poolDictionaries: '' category: 'T-gen-Regular Expression Nodes'! AlternationRangeNode comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= I represent the alternation of two or more atomic regular expressions. My specification encorporates the use of character ranges, e.g. [a-z] (= a | b | c | ... | z). My children are either simple characters or character ranges, e.g. [a-z0-9!!@#] or [aeiou] (vowels).'! !AlternationRangeNode methodsFor: 'printing'! printOn: aStream aStream nextPut: $[. self children do: [:child | child printOn: aStream]. aStream nextPut: $]! ! !AlternationRangeNode methodsFor: 'building parse trees'! addChildrenInitial: anOrderedCollection anOrderedCollection size = 1 ifTrue: [self children: anOrderedCollection removeFirst] ifFalse: [self error: 'wrong number of children']! ! !AlternationRangeNode methodsFor: 'converting'! asPureRegExpr "Answer a new version of the receiver consisting of only characters, concatenations, alternations, and (star) closures. Also, eliminate single child alternations and concatenations." | kids | kids := OrderedChildren new. self characters do: [:char | kids add: (self makeCharNodeFor: char)]. ^self alternationNodeClass children: kids! ! !AlternationRangeNode methodsFor: 'accessing'! characters "Answer the Set of Characters I represent." | chars | chars := Set new. self childrenDo: [:child | child addCharsTo: chars]. ^chars! ! !AlternationRangeNode methodsFor: 'private'! makeCharNodeFor: aChar "Answer a new CharacterNode for aChar." ^self characterNodeClass charSpec: (String with: aChar)! ! AlternationRangeNode subclass: #ComplementedAlternationRangeNode instanceVariableNames: '' classVariableNames: '' poolDictionaries: '' category: 'T-gen-Regular Expression Nodes'! ComplementedAlternationRangeNode comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= I represent the alternation of two or more atomic regular expressions (specified as the complement of an alternation range). I take the complement of the universe of printable characters only, e.g. ~[aeiou] (all non-vowel printable characters).'! !ComplementedAlternationRangeNode methodsFor: 'printing'! printOn: aStream aStream nextPut: $~. super printOn: aStream! ! !ComplementedAlternationRangeNode methodsFor: 'accessing'! characters "Answer the Set of Characters I represent." | chars | chars := self characterUniverse. chars removeAll: super characters. ^chars! characterUniverse "Answer a collection of printable characters." ^((32 to: 126) collect: [:ea | Character value: ea]) asSet! ! UnaryRegExprNode subclass: #StarClosureNode instanceVariableNames: '' classVariableNames: '' poolDictionaries: '' category: 'T-gen-Regular Expression Nodes'! StarClosureNode comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= I represent the regular expression ''(expr)*'' which denotes zero or more repetitions of ''expr''.'! !StarClosureNode methodsFor: 'printing'! printOn: aStream aStream nextPut: $(. self onlyChild printOn: aStream. aStream nextPutAll: ')*'! ! !StarClosureNode methodsFor: 'converting'! asFSAStartingAt: startState endingAt: finalState | middleState | middleState := self fsaStateClass new. startState goto: middleState on: self epsilon. middleState goto: finalState on: self epsilon. self onlyChild asFSAStartingAt: middleState endingAt: middleState! asPureRegExpr "Answer a new version of the receiver consisting of only characters, concatenations, alternations, and (star) closures." ^self starClosureNodeClass onlyChild: self onlyChild asPureRegExpr! ! !StarClosureNode methodsFor: 'transforming'! transformUsing: lhsNames withLHS: lhsNode alpha: alphaNodes gamma: gammaNodes "Transform productions of the form 'A -> alpha beta* gamma' into 'A -> alpha B', 'B -> beta B', and 'B -> gamma'." | newProds newLHS | newProds := OrderedCollection new. newLHS := self newNonterminalFrom: lhsNode excluding: lhsNames. newProds add: (self createNewProductionWithLHS: lhsNode andRHS: (alphaNodes addLast: newLHS; yourself)). newProds add: (self createNewProductionWithLHS: newLHS andRHS: (OrderedCollection with: self onlyChild with: newLHS)). newProds add: (self createNewProductionWithLHS: newLHS andRHS: gammaNodes). ^newProds! ! UnaryRegExprNode subclass: #PlusClosureNode instanceVariableNames: '' classVariableNames: '' poolDictionaries: '' category: 'T-gen-Regular Expression Nodes'! PlusClosureNode comment: '================================================= Copyright (c) 1992 by Justin O. Graver. All rights reserved (with exceptions). For complete information evaluate "Object tgenCopyright." ================================================= I represent the regular expression ''(expr)+'' which denotes ''expr (expr)*''.'! !PlusClosureNode methodsFor: 'printing'! printOn: aStream aStream nextPut: $(. self onlyChild printOn: aStream. aStream nextPutAll: ')+'! ! !PlusClosureNode methodsFor: 'converting'! asPureRegExpr "Answer a new version of the receiver consisting of only characters, concatenations, alternations, and (star) closures." ^self concatenationNodeClass children: (OrderedChildren with: self onlyChild asPureRegExpr with: (self starClosureNodeClass onlyChild: self onlyChild asPureRegExpr))! ! !PlusClosureNode methodsFor: 'transforming'! transformUsing: lhsNames withLHS: lhsNode alpha: alphaNodes gamma: gammaNodes "Transform productions of the form 'A -> alpha beta+ gamma' into 'A -> alpha B', 'B -> beta B', and 'B -> beta gamma'." | newProds newLHS | newProds := OrderedCollection new. newLHS := self newNonterminalFrom: lhsNode excluding: lhsNames. newProds add: (self createNewProductionWithLHS: lhsNode andRHS: (alphaNodes addLast: newLHS; yourself)). newProds add: (self createNewProductionWithLHS: newLHS andRHS: (OrderedCollection with: self onlyChild copy with: newLHS)). newProds add: (self createNewProductionWithLHS: newLHS andRHS: (gammaNodes addFirst: self onlyChild copy; yourself)). ^newProds! ! EscapedCharNode initialize!