// ============================================================== // // This code automatically produced from an embedded resource. // Do not edit this file, or it will become incompatible with // the specification from which it was generated. // // ============================================================== using System; using System.IO; using System.Text; using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; using System.Runtime.Serialization; using System.Globalization; namespace QUT.GplexBuffers { // Code copied from GPLEX embedded resource [Serializable] public class BufferException : Exception { public BufferException() { } public BufferException(string message) : base(message) { } public BufferException(string message, Exception innerException) : base(message, innerException) { } protected BufferException(SerializationInfo info, StreamingContext context) : base(info, context) { } } public abstract class ScanBuff { private string fileNm; public const int EndOfFile = -1; public const int UnicodeReplacementChar = 0xFFFD; public bool IsFile { get { return (fileNm != null); } } public string FileName { get { return fileNm; } set { fileNm = value; } } public abstract int Pos { get; set; } public abstract int Read(); public virtual void Mark() { } public abstract string GetString(int begin, int limit); public static ScanBuff GetBuffer(string source) { return new StringBuffer(source); } public static ScanBuff GetBuffer(IList source) { return new LineBuffer(source); } #if (!NOFILES) public static ScanBuff GetBuffer(Stream source) { return new BuildBuffer(source); } #if (!BYTEMODE) public static ScanBuff GetBuffer(Stream source, int fallbackCodePage) { return new BuildBuffer(source, fallbackCodePage); } #endif // !BYTEMODE #endif // !NOFILES } #region Buffer classes // ============================================================== // ===== Definitions for various ScanBuff derived classes ==== // ============================================================== // =============== String input ================ // ============================================================== /// /// This class reads characters from a single string as /// required, for example, by Visual Studio language services /// sealed class StringBuffer : ScanBuff { string str; // input buffer int bPos; // current position in buffer int sLen; public StringBuffer(string source) { this.str = source; this.sLen = source.Length; this.FileName = null; } public override int Read() { if (bPos < sLen) return str[bPos++]; else if (bPos == sLen) { bPos++; return '\n'; } // one strike, see new line else { bPos++; return EndOfFile; } // two strikes and you're out! } public override string GetString(int begin, int limit) { // "limit" can be greater than sLen with the BABEL // option set. Read returns a "virtual" EOL if // an attempt is made to read past the end of the // string buffer. Without the guard any attempt // to fetch yytext for a token that includes the // EOL will throw an index exception. if (limit > sLen) limit = sLen; if (limit <= begin) return ""; else return str.Substring(begin, limit - begin); } public override int Pos { get { return bPos; } set { bPos = value; } } public override string ToString() { return "StringBuffer"; } } // ============================================================== // The LineBuff class contributed by Nigel Horspool, // nigelh@cs.uvic.cs // ============================================================== sealed class LineBuffer : ScanBuff { IList line; // list of source lines from a file int numLines; // number of strings in line list string curLine; // current line in that list int cLine; // index of current line in the list int curLen; // length of current line int curLineStart; // position of line start in whole file int curLineEnd; // position of line end in whole file int maxPos; // max position ever visited in whole file int cPos; // ordinal number of code in source // Constructed from a list of strings, one per source line. // The lines have had trailing '\n' characters removed. public LineBuffer(IList lineList) { line = lineList; numLines = line.Count; cPos = curLineStart = 0; curLine = (numLines > 0 ? line[0] : ""); maxPos = curLineEnd = curLen = curLine.Length; cLine = 1; FileName = null; } public override int Read() { if (cPos < curLineEnd) return curLine[cPos++ - curLineStart]; if (cPos++ == curLineEnd) return '\n'; if (cLine >= numLines) return EndOfFile; curLine = line[cLine]; curLen = curLine.Length; curLineStart = curLineEnd + 1; curLineEnd = curLineStart + curLen; if (curLineEnd > maxPos) maxPos = curLineEnd; cLine++; return curLen > 0 ? curLine[0] : '\n'; } // To speed up searches for the line containing a position private int cachedPosition; private int cachedIxdex; private int cachedLineStart; // Given a position pos within the entire source, the results are // ix -- the index of the containing line // lstart -- the position of the first character on that line private void findIndex(int pos, out int ix, out int lstart) { if (pos >= cachedPosition) { ix = cachedIxdex; lstart = cachedLineStart; } else { ix = lstart = 0; } while (ix < numLines) { int len = line[ix].Length + 1; if (pos < lstart + len) break; lstart += len; ix++; } cachedPosition = pos; cachedIxdex = ix; cachedLineStart = lstart; } public override string GetString(int begin, int limit) { if (begin >= maxPos || limit <= begin) return ""; int endIx, begIx, endLineStart, begLineStart; findIndex(begin, out begIx, out begLineStart); int begCol = begin - begLineStart; findIndex(limit, out endIx, out endLineStart); int endCol = limit - endLineStart; string s = line[begIx]; if (begIx == endIx) { // the usual case, substring all on one line return (endCol <= s.Length) ? s.Substring(begCol, endCol - begCol) : s.Substring(begCol) + "\n"; } // the string spans multiple lines, yuk! StringBuilder sb = new StringBuilder(); if (begCol < s.Length) sb.Append(s.Substring(begCol)); for (; ; ) { sb.Append("\n"); s = line[++begIx]; if (begIx >= endIx) break; sb.Append(s); } if (endCol <= s.Length) { sb.Append(s.Substring(0, endCol)); } else { sb.Append(s); sb.Append("\n"); } return sb.ToString(); } public override int Pos { get { return cPos; } set { cPos = value; findIndex(cPos, out cLine, out curLineStart); // cLine should be the *next* line after curLine. curLine = (cLine < numLines ? line[cLine++] : ""); curLineEnd = curLineStart + curLine.Length; } } public override string ToString() { return "LineBuffer"; } } #if (!NOFILES) // ============================================================== // ===== class BuildBuff : for unicode text files ======== // ============================================================== class BuildBuffer : ScanBuff { // Double buffer for char stream. class BufferElement { StringBuilder bldr = new StringBuilder(); StringBuilder next = new StringBuilder(); int minIx; int maxIx; int brkIx; bool appendToNext; internal BufferElement() { } internal int MaxIndex { get { return maxIx; } } // internal int MinIndex { get { return minIx; } } internal char this[int index] { get { if (index < minIx || index >= maxIx) throw new BufferException("Index was outside data buffer"); else if (index < brkIx) return bldr[index - minIx]; else return next[index - brkIx]; } } internal void Append(char[] block, int count) { maxIx += count; if (appendToNext) this.next.Append(block, 0, count); else { this.bldr.Append(block, 0, count); brkIx = maxIx; appendToNext = true; } } internal string GetString(int start, int limit) { if (limit <= start) return ""; if (start >= minIx && limit <= maxIx) if (limit < brkIx) // String entirely in bldr builder return bldr.ToString(start - minIx, limit - start); else if (start >= brkIx) // String entirely in next builder return next.ToString(start - brkIx, limit - start); else // Must do a string-concatenation return bldr.ToString(start - minIx, brkIx - start) + next.ToString(0, limit - brkIx); else throw new BufferException("String was outside data buffer"); } internal void Mark(int limit) { if (limit > brkIx + 16) // Rotate blocks { StringBuilder temp = bldr; bldr = next; next = temp; next.Length = 0; minIx = brkIx; brkIx = maxIx; } } } BufferElement data = new BufferElement(); int bPos; // Postion index in the StringBuilder BlockReader NextBlk; // Delegate that serves char-arrays; private string EncodingName { get { StreamReader rdr = NextBlk.Target as StreamReader; return (rdr == null ? "raw-bytes" : rdr.CurrentEncoding.BodyName); } } public BuildBuffer(Stream stream) { FileStream fStrm = (stream as FileStream); if (fStrm != null) FileName = fStrm.Name; NextBlk = BlockReaderFactory.Raw(stream); } #if (!BYTEMODE) public BuildBuffer(Stream stream, int fallbackCodePage) { FileStream fStrm = (stream as FileStream); if (fStrm != null) FileName = fStrm.Name; NextBlk = BlockReaderFactory.Get(stream, fallbackCodePage); } #endif /// /// Marks a conservative lower bound for the buffer, /// allowing space to be reclaimed. If an application /// needs to call GetString at arbitrary past locations /// in the input stream, Mark() is not called. /// public override void Mark() { data.Mark(bPos - 2); } public override int Pos { get { return bPos; } set { bPos = value; } } /// /// Read returns the ordinal number of the next char, or /// EOF (-1) for an end of stream. Note that the next /// code point may require *two* calls of Read(). /// /// public override int Read() { // // Characters at positions // [data.offset, data.offset + data.bldr.Length) // are available in data.bldr. // if (bPos < data.MaxIndex) { // ch0 cannot be EOF return (int)data[bPos++]; } else // Read from underlying stream { // Experimental code, blocks of page size char[] chrs = new char[4096]; int count = NextBlk(chrs, 0, 4096); if (count == 0) return EndOfFile; else { data.Append(chrs, count); return (int)data[bPos++]; } } } public override string GetString(int begin, int limit) { return data.GetString(begin, limit); } public override string ToString() { return "StringBuilder buffer, encoding: " + this.EncodingName; } } // =============== End ScanBuff-derived classes ================== public delegate int BlockReader(char[] block, int index, int number); // A delegate factory, serving up a delegate that // reads a block of characters from the underlying // encoded stream, via a StreamReader object. // public static class BlockReaderFactory { public static BlockReader Raw(Stream stream) { return delegate(char[] block, int index, int number) { byte[] b = new byte[number]; int count = stream.Read(b, 0, number); int i = 0; int j = index; for (; i < count; i++, j++) block[j] = (char)b[i]; return count; }; } #if (!BYTEMODE) public static BlockReader Get(Stream stream, int fallbackCodePage) { Encoding encoding; int preamble = Preamble(stream); if (preamble != 0) // There is a valid BOM here! encoding = Encoding.GetEncoding(preamble); else if (fallbackCodePage == -1) // Fallback is "raw" bytes return Raw(stream); else if (fallbackCodePage != -2) // Anything but "guess" encoding = Encoding.GetEncoding(fallbackCodePage); else // This is the "guess" option { int guess = new Guesser(stream).GuessCodePage(); stream.Seek(0, SeekOrigin.Begin); if (guess == -1) // ==> this is a 7-bit file encoding = Encoding.ASCII; else if (guess == 65001) encoding = Encoding.UTF8; else // ==> use the machine default encoding = Encoding.Default; } StreamReader reader = new StreamReader(stream, encoding); return reader.Read; } static int Preamble(Stream stream) { int b0 = stream.ReadByte(); int b1 = stream.ReadByte(); if (b0 == 0xfe && b1 == 0xff) return 1201; // UTF16BE if (b0 == 0xff && b1 == 0xfe) return 1200; // UTF16LE int b2 = stream.ReadByte(); if (b0 == 0xef && b1 == 0xbb && b2 == 0xbf) return 65001; // UTF8 // // There is no unicode preamble, so we // return denoter for the machine default. // stream.Seek(0, SeekOrigin.Begin); return 0; } #endif // !BYTEMODE } #endif // !NOFILES #endregion Buffer classes // ============================================================== // ============ class CodePageHandling ============= // ============================================================== #if (!NOFILES) public static class CodePageHandling { public static int GetCodePage(string option) { string command = option.ToUpperInvariant(); if (command.StartsWith("CodePage:", StringComparison.OrdinalIgnoreCase)) command = command.Substring(9); try { if (command.Equals("RAW")) return -1; else if (command.Equals("GUESS")) return -2; else if (command.Equals("DEFAULT")) return 0; else if (char.IsDigit(command[0])) return int.Parse(command, CultureInfo.InvariantCulture); else { Encoding enc = Encoding.GetEncoding(command); return enc.CodePage; } } catch (FormatException) { Console.Error.WriteLine( "Invalid format \"{0}\", using machine default", option); } catch (ArgumentException) { Console.Error.WriteLine( "Unknown code page \"{0}\", using machine default", option); } return 0; } } #region guesser #if (!BYTEMODE) // ============================================================== // ============ Encoding Guesser ============= // ============================================================== /// /// This class provides a simple finite state automaton that /// scans the file looking for (1) valid UTF-8 byte patterns, /// (2) bytes >= 0x80 which are not part of a UTF-8 sequence. /// The method then guesses whether it is UTF-8 or maybe some /// local machine default encoding. This works well for the /// various Latin encodings. /// internal class Guesser { ScanBuff buffer; public int GuessCodePage() { return Scan(); } const int maxAccept = 10; const int initial = 0; const int eofNum = 0; const int goStart = -1; const int INITIAL = 0; const int EndToken = 0; #region user code /* * Reads the bytes of a file to determine if it is * UTF-8 or a single-byte code page file. */ public long utfX; public long uppr; #endregion user code int state; int currentStart = startState[0]; int code; #region ScannerTables static int[] startState = new int[] { 11, 0 }; #region CharacterMap static sbyte[] map = new sbyte[256] { /* '\0' */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '\x10' */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '\x20' */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '0' */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '@' */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 'P' */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '`' */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 'p' */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* '\x80' */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* '\x90' */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* '\xA0' */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* '\xB0' */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* '\xC0' */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* '\xD0' */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* '\xE0' */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* '\xF0' */ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5 }; #endregion static sbyte[][] nextState = new sbyte[][] { new sbyte[] {0, 0, 0, 0, 0, 0}, new sbyte[] {-1, -1, 10, -1, -1, -1}, new sbyte[] {-1, -1, -1, -1, -1, -1}, new sbyte[] {-1, -1, 8, -1, -1, -1}, new sbyte[] {-1, -1, 5, -1, -1, -1}, new sbyte[] {-1, -1, 6, -1, -1, -1}, new sbyte[] {-1, -1, 7, -1, -1, -1}, null, new sbyte[] {-1, -1, 9, -1, -1, -1}, null, null, new sbyte[] {-1, 1, 2, 3, 4, 2} }; [SuppressMessage("Microsoft.Performance", "CA1810:InitializeReferenceTypeStaticFieldsInline")] // Reason for suppression: cannot have self-reference in array initializer. static Guesser() { nextState[7] = nextState[2]; nextState[9] = nextState[2]; nextState[10] = nextState[2]; } int NextState() { if (code == ScanBuff.EndOfFile) return eofNum; else return nextState[state][map[code]]; } #endregion public Guesser(System.IO.Stream file) { SetSource(file); } public void SetSource(System.IO.Stream source) { this.buffer = new BuildBuffer(source); code = buffer.Read(); } int Scan() { for (; ; ) { int next; state = currentStart; while ((next = NextState()) == goStart) code = buffer.Read(); state = next; code = buffer.Read(); while ((next = NextState()) > eofNum) { state = next; code = buffer.Read(); } if (state <= maxAccept) { #region ActionSwitch #pragma warning disable 162 switch (state) { case eofNum: switch (currentStart) { case 11: if (utfX == 0 && uppr == 0) return -1; /* raw ascii */ else if (uppr * 10 > utfX) return 0; /* default code page */ else return 65001; /* UTF-8 encoding */ break; } return EndToken; case 1: // Recognized '{Upper128}', Shortest string "\xC0" case 2: // Recognized '{Upper128}', Shortest string "\x80" case 3: // Recognized '{Upper128}', Shortest string "\xE0" case 4: // Recognized '{Upper128}', Shortest string "\xF0" uppr++; break; case 5: // Recognized '{Utf8pfx4}{Utf8cont}', Shortest string "\xF0\x80" uppr += 2; break; case 6: // Recognized '{Utf8pfx4}{Utf8cont}{2}', Shortest string "\xF0\x80\x80" uppr += 3; break; case 7: // Recognized '{Utf8pfx4}{Utf8cont}{3}', Shortest string "\xF0\x80\x80\x80" utfX += 3; break; case 8: // Recognized '{Utf8pfx3}{Utf8cont}', Shortest string "\xE0\x80" uppr += 2; break; case 9: // Recognized '{Utf8pfx3}{Utf8cont}{2}', Shortest string "\xE0\x80\x80" utfX += 2; break; case 10: // Recognized '{Utf8pfx2}{Utf8cont}', Shortest string "\xC0\x80" utfX++; break; default: break; } #pragma warning restore 162 #endregion } } } } // end class Guesser #endif // !BYTEMODE #endregion #endif // !NOFILES // End of code copied from embedded resource }