// Gardens Point Parser Generator // Copyright (c) Wayne Kelly, QUT 2005-2014 // (see accompanying GPPGcopyright.rtf) using System; using System.Text; using System.Globalization; using System.Collections.Generic; using System.Runtime.Serialization; using System.Diagnostics.CodeAnalysis; namespace QUT.Gppg { /// /// Abstract class for GPPG shift-reduce parsers. /// Parsers generated by GPPG derive from this base /// class, overriding the abstract Initialize() and /// DoAction() methods. /// /// Semantic value type /// Location type #if EXPORT_GPPG public abstract class ShiftReduceParser #else internal abstract class ShiftReduceParser #endif where TSpan : IMerge, new() { private AbstractScanner scanner; /// /// The abstract scanner for this parser. /// protected AbstractScanner Scanner { get { return scanner; } set { scanner = value; } } /// /// Constructor for base class /// /// Scanner instance for this parser protected ShiftReduceParser( AbstractScanner scanner ) { this.scanner = scanner; } // ============================================================== // TECHNICAL EXPLANATION. // Why the next two fields are not exposed via properties. // ============================================================== // These fields are of the generic parameter types, and are // frequently instantiated as struct types in derived classes. // Semantic actions are defined in the derived classes and refer // to instance fields of these structs. In such cases the code // "get_CurrentSemanticValue().myField = blah;" will fail since // the getter pushes the value of the field, not the reference. // So, in the presence of properties, gppg would need to encode // such field accesses as ... // "tmp = get_CurrentSemanticValue(); // Fetch value // tmp.myField = blah; // update // set_CurrentSemanticValue(tmp); " // Write update back. // There is no issue if TValue is restricted to be a ref type. // The same explanation applies to scanner.yylval. // ============================================================== /// /// The current value of the "$$" symbolic variable in the parser /// [SuppressMessage( "Microsoft.Design", "CA1051:DoNotDeclareVisibleInstanceFields" )] protected TValue CurrentSemanticValue; /// /// The current value of the "@$" symbolic variable in the parser /// [SuppressMessage( "Microsoft.Design", "CA1051:DoNotDeclareVisibleInstanceFields" )] protected TSpan CurrentLocationSpan; protected int NextToken; private TSpan LastSpan; private State FsaState; private bool recovering; private int tokensSinceLastError; private PushdownPrefixState StateStack = new PushdownPrefixState(); private PushdownPrefixState valueStack = new PushdownPrefixState(); private PushdownPrefixState locationStack = new PushdownPrefixState(); /// /// The stack of semantic value (YYSTYPE) values. /// protected PushdownPrefixState ValueStack { get { return valueStack; } } /// /// The stack of location value (YYLTYPE) varlues. /// protected PushdownPrefixState LocationStack { get { return locationStack; } } private int errorToken; private int endOfFileToken; private string[] nonTerminals; private State[] states; private Rule[] rules; /// /// Initialization method to allow derived classes /// to insert the rule list into this base class. /// /// The array of Rule objects protected void InitRules( Rule[] rules ) { this.rules = rules; } /// /// Initialization method to allow derived classes to /// insert the states table into this base class. /// /// The pre-initialized states table protected void InitStates( State[] states ) { this.states = states; } /// /// OBSOLETE FOR VERSION 1.4.0 /// /// protected void InitStateTable( int size ) { states = new State[size]; } /// /// Initialization method to allow derived classes /// to insert the special value for the error and EOF tokens. /// /// The error state ordinal /// The EOF stat ordinal protected void InitSpecialTokens( int err, int end ) { errorToken = err; endOfFileToken = end; } /// /// Initialization method to allow derived classes to /// insert the non-terminal symbol names into this base class. /// /// Non-terminal symbol names protected void InitNonTerminals( string[] names ) { nonTerminals = names; } #region YYAbort, YYAccept etcetera. [Serializable] [SuppressMessage( "Microsoft.Design", "CA1064:ExceptionsShouldBePublic" )] // Reason for FxCop message suppression - // This exception cannot escape from the local context private class AcceptException : Exception { internal AcceptException() { } protected AcceptException( SerializationInfo i, StreamingContext c ) : base( i, c ) { } } [Serializable] [SuppressMessage( "Microsoft.Design", "CA1064:ExceptionsShouldBePublic" )] // Reason for FxCop message suppression - // This exception cannot escape from the local context private class AbortException : Exception { internal AbortException() { } protected AbortException( SerializationInfo i, StreamingContext c ) : base( i, c ) { } } [Serializable] [SuppressMessage( "Microsoft.Design", "CA1064:ExceptionsShouldBePublic" )] // Reason for FxCop message suppression - // This exception cannot escape from the local context private class ErrorException : Exception { internal ErrorException() { } protected ErrorException( SerializationInfo i, StreamingContext c ) : base( i, c ) { } } // The following methods are only called from within // a semantic action. The thrown exceptions can never // propagate outside the ShiftReduceParser class in // which they are nested. /// /// Force parser to terminate, returning "true" /// protected static void YYAccept() { throw new AcceptException(); } /// /// Force parser to terminate, returning "false" /// protected static void YYAbort() { throw new AbortException(); } /// /// Force parser to terminate, returning /// "false" if error recovery fails. /// protected static void YYError() { throw new ErrorException(); } /// /// Check if parser in error recovery state. /// protected bool YYRecovering { get { return recovering; } } #endregion /// /// Abstract base method. ShiftReduceParser calls this /// to initialize the base class data structures. Concrete /// parser classes must override this method. /// protected abstract void Initialize(); /// /// Main entry point of the Shift-Reduce Parser. /// /// True if parse succeeds, else false for /// unrecoverable errors public bool Parse() { Initialize(); // allow derived classes to instantiate rules, states and nonTerminals NextToken = 0; FsaState = states[0]; StateStack.Push( FsaState ); valueStack.Push( CurrentSemanticValue ); LocationStack.Push( CurrentLocationSpan ); while (true) { #if TRACE_ACTIONS Console.Error.WriteLine("Entering state {0} ", FsaState.number); DisplayStack(); #endif int action = FsaState.defaultAction; if (FsaState.ParserTable != null) { if (NextToken == 0) { // We save the last token span, so that the location span // of production right hand sides that begin or end with a // nullable production will be correct. LastSpan = scanner.yylloc; NextToken = scanner.yylex(); #if TRACE_ACTIONS Console.Error.WriteLine( "Reading: Next token is {0}", TerminalToString( NextToken ) ); #endif } #if TRACE_ACTIONS else Console.Error.WriteLine( "Next token is still {0}", TerminalToString( NextToken ) ); #endif if (FsaState.ParserTable.ContainsKey( NextToken )) action = FsaState.ParserTable[NextToken]; } if (action > 0) // shift { Shift( action ); } else if (action < 0) // reduce { try { Reduce( -action ); if (action == -1) // accept return true; } catch (Exception x) { if (x is AbortException) return false; else if (x is AcceptException) return true; else if (x is ErrorException && !ErrorRecovery()) return false; else throw; // Rethrow x, preserving information. } } else if (action == 0) // error if (!ErrorRecovery()) return false; } } private void Shift( int stateIndex ) { #if TRACE_ACTIONS Console.Error.Write("Shifting token {0}, ", TerminalToString(NextToken)); #endif FsaState = states[stateIndex]; valueStack.Push( scanner.yylval ); StateStack.Push( FsaState ); LocationStack.Push( scanner.yylloc ); if (recovering) { if (NextToken != errorToken) tokensSinceLastError++; if (tokensSinceLastError > 5) recovering = false; } if (NextToken != endOfFileToken) NextToken = 0; } private void Reduce( int ruleNumber ) { #if TRACE_ACTIONS DisplayRule(ruleNumber); #endif Rule rule = rules[ruleNumber]; int rhLen = rule.RightHandSide.Length; // // Default actions for unit productions. // if (rhLen == 1) { CurrentSemanticValue = valueStack.TopElement(); // Default action: $$ = $1; CurrentLocationSpan = LocationStack.TopElement(); // Default action "@$ = @1; } else if (rhLen == 0) { // Create a new blank value. // Explicit semantic action may mutate this value CurrentSemanticValue = default( TValue ); // The location span for an empty production will start with the // beginning of the next lexeme, and end with the finish of the // previous lexeme. This gives the correct behaviour when this // nonsense value is used in later Merge operations. CurrentLocationSpan = (scanner.yylloc != null && LastSpan != null ? scanner.yylloc.Merge( LastSpan ) : default( TSpan )); } else { // Default action: $$ = $1; CurrentSemanticValue = valueStack[LocationStack.Depth - rhLen]; // Default action "@$ = @1.Merge(@N)" for location info. TSpan at1 = LocationStack[LocationStack.Depth - rhLen]; TSpan atN = LocationStack[LocationStack.Depth - 1]; CurrentLocationSpan = ((at1 != null && atN != null) ? at1.Merge( atN ) : default( TSpan )); } DoAction( ruleNumber ); for (int i = 0; i < rule.RightHandSide.Length; i++) { StateStack.Pop(); valueStack.Pop(); LocationStack.Pop(); } FsaState = StateStack.TopElement(); if (FsaState.Goto.ContainsKey( rule.LeftHandSide )) FsaState = states[FsaState.Goto[rule.LeftHandSide]]; StateStack.Push( FsaState ); valueStack.Push( CurrentSemanticValue ); LocationStack.Push( CurrentLocationSpan ); } /// /// Execute the selected action from array. /// Must be overriden in derived classes. /// /// Index of the action to perform protected abstract void DoAction( int actionNumber ); private bool ErrorRecovery() { bool discard; if (!recovering) // if not recovering from previous error ReportError(); if (!FindErrorRecoveryState()) return false; // // The interim fix for the "looping in error recovery" // artifact involved moving the setting of the recovering // bool until after invalid tokens have been discarded. // ShiftErrorToken(); discard = DiscardInvalidTokens(); recovering = true; tokensSinceLastError = 0; return discard; } private void ReportError() { StringBuilder errorMsg = new StringBuilder(); errorMsg.AppendFormat( "Syntax error, unexpected {0}", TerminalToString( NextToken ) ); if (FsaState.ParserTable.Count < 7) { bool first = true; foreach (int terminal in FsaState.ParserTable.Keys) { if (first) errorMsg.Append( ", expecting " ); else errorMsg.Append( ", or " ); errorMsg.Append( TerminalToString( terminal ) ); first = false; } } scanner.yyerror( errorMsg.ToString() ); } private void ShiftErrorToken() { int old_next = NextToken; NextToken = errorToken; Shift( FsaState.ParserTable[NextToken] ); #if TRACE_ACTIONS Console.Error.WriteLine("Entering state {0} ", FsaState.number); #endif NextToken = old_next; } private bool FindErrorRecoveryState() { while (true) // pop states until one found that accepts error token { if (FsaState.ParserTable != null && FsaState.ParserTable.ContainsKey( errorToken ) && FsaState.ParserTable[errorToken] > 0) // shift return true; #if TRACE_ACTIONS Console.Error.WriteLine("Error: popping state {0}", StateStack.TopElement().number); #endif StateStack.Pop(); valueStack.Pop(); LocationStack.Pop(); #if TRACE_ACTIONS DisplayStack(); #endif if (StateStack.IsEmpty()) { #if TRACE_ACTIONS Console.Error.WriteLine("Aborting: didn't find a state that accepts error token"); #endif return false; } else FsaState = StateStack.TopElement(); } } private bool DiscardInvalidTokens() { int action = FsaState.defaultAction; if (FsaState.ParserTable != null) { // Discard tokens until find one that works ... while (true) { if (NextToken == 0) { #if TRACE_ACTIONS Console.Error.Write("Reading a token: "); #endif NextToken = scanner.yylex(); } #if TRACE_ACTIONS Console.Error.WriteLine("Next token is {0}", TerminalToString(NextToken)); #endif if (NextToken == endOfFileToken) return false; if (FsaState.ParserTable.ContainsKey( NextToken )) action = FsaState.ParserTable[NextToken]; if (action != 0) return true; else { #if TRACE_ACTIONS Console.Error.WriteLine("Error: Discarding {0}", TerminalToString(NextToken)); #endif NextToken = 0; } } } else if (recovering && tokensSinceLastError == 0) { // // Boolean recovering is not set until after the first // error token has been shifted. Thus if we get back // here with recovering set and no tokens read we are // looping on the same error recovery action. This // happens if current_state.ParserTable is null because // the state has an LR(0) reduction, but not all // lookahead tokens are valid. This only occurs for // error productions that *end* on "error". // // This action discards tokens one at a time until // the looping stops. Another attack would be to always // use the LALR(1) table if a production ends on "error" // #if TRACE_ACTIONS Console.Error.WriteLine("Error: panic discard of {0}", TerminalToString(NextToken)); #endif if (NextToken == endOfFileToken) return false; NextToken = 0; return true; } else return true; } /// /// Traditional YACC method. Discards the next input token. /// [SuppressMessage( "Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "yyclearin" )] [SuppressMessage( "Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "yyclearin" )] // Reason for FxCop message suppression - // This is a traditional name for YACC-like functionality protected void yyclearin() { NextToken = 0; } /// /// Tradional YACC method. Clear the "recovering" flag. /// [SuppressMessage( "Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "yyerrok" )] [SuppressMessage( "Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "yyerrok" )] // Reason for FxCop message suppression - // This is a traditional name for YACC-like functionality protected void yyerrok() { recovering = false; } /// /// OBSOLETE FOR VERSION 1.4.0 /// Method used by derived types to insert new /// state instances in the "states" array. /// /// index of the state /// data for the state protected void AddState( int stateNumber, State state ) { states[stateNumber] = state; state.number = stateNumber; } private void DisplayStack() { Console.Error.Write( "State stack is now:" ); for (int i = 0; i < StateStack.Depth; i++) Console.Error.Write( " {0}", StateStack[i].number ); Console.Error.WriteLine(); } private void DisplayRule( int ruleNumber ) { Console.Error.Write( "Reducing stack by rule {0}, ", ruleNumber ); DisplayProduction( rules[ruleNumber] ); } private void DisplayProduction( Rule rule ) { if (rule.RightHandSide.Length == 0) Console.Error.Write( "/* empty */ " ); else foreach (int symbol in rule.RightHandSide) Console.Error.Write( "{0} ", SymbolToString( symbol ) ); Console.Error.WriteLine( "-> {0}", SymbolToString( rule.LeftHandSide ) ); } /// /// Abstract state class naming terminal symbols. /// This is overridden by derived classes with the /// name (or alias) to be used in error messages. /// /// The terminal ordinal /// protected abstract string TerminalToString( int terminal ); private string SymbolToString( int symbol ) { if (symbol < 0) return nonTerminals[-symbol - 1]; else return TerminalToString( symbol ); } /// /// Return text representation of argument character /// /// The character to convert /// String representation of the character protected static string CharToString( char input ) { switch (input) { case '\a': return @"'\a'"; case '\b': return @"'\b'"; case '\f': return @"'\f'"; case '\n': return @"'\n'"; case '\r': return @"'\r'"; case '\t': return @"'\t'"; case '\v': return @"'\v'"; case '\0': return @"'\0'"; default: return string.Format( CultureInfo.InvariantCulture, "'{0}'", input ); } } } /// /// Classes implementing this interface must supply a /// method that merges two location objects to return /// a new object of the same type. /// GPPG-generated parsers have the default location /// action equivalent to "@$ = @1.Merge(@N);" where N /// is the right-hand-side length of the production. /// /// The Location type #if EXPORT_GPPG public interface IMerge #else internal interface IMerge #endif { /// /// Interface method that creates a location object from /// the current and last object. Typically used to create /// a location object extending from the start of the @1 /// object to the end of the @N object. /// /// The lexically last object to merge /// The merged location object TSpan Merge( TSpan last ); } /// /// This is the default class that carries location /// information from the scanner to the parser. /// If you don't declare "%YYLTYPE Foo" the parser /// will expect to deal with this type. /// #if EXPORT_GPPG public class LexLocation : IMerge #else [SuppressMessage( "Microsoft.Performance", "CA1812:AvoidUninstantiatedInternalClasses" )] internal class LexLocation : IMerge #endif { private int startLine; // start line private int startColumn; // start column private int endLine; // end line private int endColumn; // end column /// /// The line at which the text span starts. /// public int StartLine { get { return startLine; } } /// /// The column at which the text span starts. /// public int StartColumn { get { return startColumn; } } /// /// The line on which the text span ends. /// public int EndLine { get { return endLine; } } /// /// The column of the first character /// beyond the end of the text span. /// public int EndColumn { get { return endColumn; } } /// /// Default no-arg constructor. /// public LexLocation() { } /// /// Constructor for text-span with given start and end. /// /// start line /// start column /// end line /// end column public LexLocation( int sl, int sc, int el, int ec ) { startLine = sl; startColumn = sc; endLine = el; endColumn = ec; } /// /// Create a text location which spans from the /// start of "this" to the end of the argument "last" /// /// The last location in the result span /// The merged span public LexLocation Merge( LexLocation last ) { return new LexLocation( this.startLine, this.startColumn, last.endLine, last.endColumn ); } } /// /// Abstract scanner class that GPPG expects its scanners to /// extend. /// /// Semantic value type YYSTYPE /// Source location type YYLTYPE #if EXPORT_GPPG public abstract class AbstractScanner #else internal abstract class AbstractScanner #endif where TSpan : IMerge { /// /// Lexical value optionally set by the scanner. The value /// is of the %YYSTYPE type declared in the parser spec. /// [SuppressMessage( "Microsoft.Design", "CA1051:DoNotDeclareVisibleInstanceFields" )] [SuppressMessage( "Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "yylval" )] [SuppressMessage( "Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "yylval" )] // Reason for FxCop message suppression - // This is a traditional name for YACC-like functionality // A field must be declared for this value of parametric type, // since it may be instantiated by a value struct. If it were // implemented as a property, machine generated code in derived // types would not be able to select on the returned value. #pragma warning disable 649 public TValue yylval; // Lexical value: set by scanner #pragma warning restore 649 /// /// Current scanner location property. The value is of the /// type declared by %YYLTYPE in the parser specification. /// [SuppressMessage( "Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "yylloc" )] [SuppressMessage( "Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "yylloc" )] // Reason for FxCop message suppression - // This is a traditional name for YACC-like functionality public virtual TSpan yylloc { get { return default( TSpan ); } // Empty implementation allowing set { /* skip */ } // yylloc to be ignored entirely. } /// /// Main call point for LEX-like scanners. Returns an int /// corresponding to the token recognized by the scanner. /// /// An int corresponding to the token [SuppressMessage( "Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "yylex" )] [SuppressMessage( "Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "yylex" )] // Reason for FxCop message suppression - // This is a traditional name for YACC-like functionality public abstract int yylex(); /// /// Traditional error reporting provided by LEX-like scanners /// to their YACC-like clients. /// /// Message format string /// Optional array of args [SuppressMessage( "Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "yyerror" )] [SuppressMessage( "Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "yyerror" )] // Reason for FxCop message suppression - // This is a traditional name for YACC-like functionality public virtual void yyerror( string format, params object[] args ) { } } /// /// Encapsulated state for the parser. /// Opaque to users, visible to the tool-generated code. /// #if EXPORT_GPPG public class State { /// /// The number of states in the automaton. /// public int number; #else internal class State { /// /// The index of this state in the states array. /// internal int number; #endif internal Dictionary ParserTable; // Terminal -> ParseAction internal Dictionary Goto; // NonTerminal -> State; internal int defaultAction; // = 0; // ParseAction /// /// State transition data for this state. Pairs of elements of the /// goto array associate symbol ordinals with next state indices. /// The actions array is passed to another constructor. /// /// The action listc /// Next state data public State( int[] actions, int[] goToList ) : this( actions ) { Goto = new Dictionary(); for (int i = 0; i < goToList.Length; i += 2) Goto.Add( goToList[i], goToList[i + 1] ); } /// /// Action data for this state. Pairs of elements of the /// action array associate action ordinals with each of /// those symbols that have actions in the current state. /// /// The action array public State( int[] actions ) { ParserTable = new Dictionary(); for (int i = 0; i < actions.Length; i += 2) ParserTable.Add( actions[i], actions[i + 1] ); } /// /// Set the default action for this state. /// /// Ordinal of the default action public State( int defaultAction ) { this.defaultAction = defaultAction; } /// /// Set the default action and the state transition table. /// /// The default action /// Transitions from this state public State( int defaultAction, int[] goToList ) : this( defaultAction ) { Goto = new Dictionary(); for (int i = 0; i < goToList.Length; i += 2) Goto.Add( goToList[i], goToList[i + 1] ); } } /// /// Rule representation at runtime. /// #if EXPORT_GPPG public class Rule #else internal class Rule #endif { internal int LeftHandSide; // symbol internal int[] RightHandSide; // symbols /// /// Rule constructor. This holds the ordinal of /// the left hand side symbol, and the list of /// right hand side symbols, in lexical order. /// /// The LHS non-terminal /// The RHS symbols, in lexical order public Rule( int left, int[] right ) { this.LeftHandSide = left; this.RightHandSide = right; } } /// /// Stack utility for the shift-reduce parser. /// GPPG parsers have three instances: /// (1) The parser state stack, T = QUT.Gppg.State, /// (2) The semantic value stack, T = TValue, /// (3) The location stack, T = TSpan. /// /// #if EXPORT_GPPG public class PushdownPrefixState #else internal class PushdownPrefixState #endif { // Note that we cannot use the BCL Stack class // here as derived types need to index into stacks. // private T[] array = new T[8]; private int tos = 0; /// /// Indexer for values of the stack below the top. /// /// index of the element, starting from the bottom /// the selected element public T this[int index] { get { return array[index]; } } /// /// The current depth of the stack. /// public int Depth { get { return tos; } } internal void Push( T value ) { if (tos >= array.Length) { T[] newarray = new T[array.Length * 2]; System.Array.Copy( array, newarray, tos ); array = newarray; } array[tos++] = value; } internal T Pop() { T rslt = array[--tos]; array[tos] = default( T ); return rslt; } internal T TopElement() { return array[tos - 1]; } internal bool IsEmpty() { return tos == 0; } } }