#region Header /** * Lexer.cs * JSON lexer implementation based on a finite state machine. * * The authors disclaim copyright to this source code. For more details, see * the COPYING file included with this distribution. **/ #endregion using System; using System.Collections.Generic; using System.IO; using System.Text; namespace LitJson { internal class FsmContext { public bool Return; public int NextState; public Lexer L; public int StateStack; } internal class Lexer { #region Fields private delegate bool StateHandler (FsmContext ctx); private static int[] fsm_return_table; private static StateHandler[] fsm_handler_table; private bool allow_comments; private bool allow_single_quoted_strings; private bool end_of_input; private FsmContext fsm_context; private int input_buffer; private int input_char; private TextReader reader; private int state; private StringBuilder string_buffer; private string string_value; private int token; private int unichar; #endregion #region Properties public bool AllowComments { get { return allow_comments; } set { allow_comments = value; } } public bool AllowSingleQuotedStrings { get { return allow_single_quoted_strings; } set { allow_single_quoted_strings = value; } } public bool EndOfInput { get { return end_of_input; } } public int Token { get { return token; } } public string StringValue { get { return string_value; } } #endregion #region Constructors static Lexer () { PopulateFsmTables (); } public Lexer (TextReader reader) { allow_comments = true; allow_single_quoted_strings = true; input_buffer = 0; string_buffer = new StringBuilder (128); state = 1; end_of_input = false; this.reader = reader; fsm_context = new FsmContext (); fsm_context.L = this; } #endregion #region Static Methods private static int HexValue (int digit) { switch (digit) { case 'a': case 'A': return 10; case 'b': case 'B': return 11; case 'c': case 'C': return 12; case 'd': case 'D': return 13; case 'e': case 'E': return 14; case 'f': case 'F': return 15; default: return digit - '0'; } } private static void PopulateFsmTables () { // See section A.1. of the manual for details of the finite // state machine. fsm_handler_table = new StateHandler[28] { State1, State2, State3, State4, State5, State6, State7, State8, State9, State10, State11, State12, State13, State14, State15, State16, State17, State18, State19, State20, State21, State22, State23, State24, State25, State26, State27, State28 }; fsm_return_table = new int[28] { (int) ParserToken.Char, 0, (int) ParserToken.Number, (int) ParserToken.Number, 0, (int) ParserToken.Number, 0, (int) ParserToken.Number, 0, 0, (int) ParserToken.True, 0, 0, 0, (int) ParserToken.False, 0, 0, (int) ParserToken.Null, (int) ParserToken.CharSeq, (int) ParserToken.Char, 0, 0, (int) ParserToken.CharSeq, (int) ParserToken.Char, 0, 0, 0, 0 }; } private static char ProcessEscChar (int esc_char) { switch (esc_char) { case '"': case '\'': case '\\': case '/': return Convert.ToChar (esc_char); case 'n': return '\n'; case 't': return '\t'; case 'r': return '\r'; case 'b': return '\b'; case 'f': return '\f'; default: // Unreachable return '?'; } } private static bool State1 (FsmContext ctx) { while (ctx.L.GetChar ()) { if (ctx.L.input_char == ' ' || ctx.L.input_char >= '\t' && ctx.L.input_char <= '\r') continue; if (ctx.L.input_char >= '1' && ctx.L.input_char <= '9') { ctx.L.string_buffer.Append ((char) ctx.L.input_char); ctx.NextState = 3; return true; } switch (ctx.L.input_char) { case '"': ctx.NextState = 19; ctx.Return = true; return true; case ',': case ':': case '[': case ']': case '{': case '}': ctx.NextState = 1; ctx.Return = true; return true; case '-': ctx.L.string_buffer.Append ((char) ctx.L.input_char); ctx.NextState = 2; return true; case '0': ctx.L.string_buffer.Append ((char) ctx.L.input_char); ctx.NextState = 4; return true; case 'f': ctx.NextState = 12; return true; case 'n': ctx.NextState = 16; return true; case 't': ctx.NextState = 9; return true; case '\'': if (! ctx.L.allow_single_quoted_strings) return false; ctx.L.input_char = '"'; ctx.NextState = 23; ctx.Return = true; return true; case '/': if (! ctx.L.allow_comments) return false; ctx.NextState = 25; return true; default: return false; } } return true; } private static bool State2 (FsmContext ctx) { ctx.L.GetChar (); if (ctx.L.input_char >= '1' && ctx.L.input_char<= '9') { ctx.L.string_buffer.Append ((char) ctx.L.input_char); ctx.NextState = 3; return true; } switch (ctx.L.input_char) { case '0': ctx.L.string_buffer.Append ((char) ctx.L.input_char); ctx.NextState = 4; return true; default: return false; } } private static bool State3 (FsmContext ctx) { while (ctx.L.GetChar ()) { if (ctx.L.input_char >= '0' && ctx.L.input_char <= '9') { ctx.L.string_buffer.Append ((char) ctx.L.input_char); continue; } if (ctx.L.input_char == ' ' || ctx.L.input_char >= '\t' && ctx.L.input_char <= '\r') { ctx.Return = true; ctx.NextState = 1; return true; } switch (ctx.L.input_char) { case ',': case ']': case '}': ctx.L.UngetChar (); ctx.Return = true; ctx.NextState = 1; return true; case '.': ctx.L.string_buffer.Append ((char) ctx.L.input_char); ctx.NextState = 5; return true; case 'e': case 'E': ctx.L.string_buffer.Append ((char) ctx.L.input_char); ctx.NextState = 7; return true; default: return false; } } return true; } private static bool State4 (FsmContext ctx) { ctx.L.GetChar (); if (ctx.L.input_char == ' ' || ctx.L.input_char >= '\t' && ctx.L.input_char <= '\r') { ctx.Return = true; ctx.NextState = 1; return true; } switch (ctx.L.input_char) { case ',': case ']': case '}': ctx.L.UngetChar (); ctx.Return = true; ctx.NextState = 1; return true; case '.': ctx.L.string_buffer.Append ((char) ctx.L.input_char); ctx.NextState = 5; return true; case 'e': case 'E': ctx.L.string_buffer.Append ((char) ctx.L.input_char); ctx.NextState = 7; return true; default: return false; } } private static bool State5 (FsmContext ctx) { ctx.L.GetChar (); if (ctx.L.input_char >= '0' && ctx.L.input_char <= '9') { ctx.L.string_buffer.Append ((char) ctx.L.input_char); ctx.NextState = 6; return true; } return false; } private static bool State6 (FsmContext ctx) { while (ctx.L.GetChar ()) { if (ctx.L.input_char >= '0' && ctx.L.input_char <= '9') { ctx.L.string_buffer.Append ((char) ctx.L.input_char); continue; } if (ctx.L.input_char == ' ' || ctx.L.input_char >= '\t' && ctx.L.input_char <= '\r') { ctx.Return = true; ctx.NextState = 1; return true; } switch (ctx.L.input_char) { case ',': case ']': case '}': ctx.L.UngetChar (); ctx.Return = true; ctx.NextState = 1; return true; case 'e': case 'E': ctx.L.string_buffer.Append ((char) ctx.L.input_char); ctx.NextState = 7; return true; default: return false; } } return true; } private static bool State7 (FsmContext ctx) { ctx.L.GetChar (); if (ctx.L.input_char >= '0' && ctx.L.input_char<= '9') { ctx.L.string_buffer.Append ((char) ctx.L.input_char); ctx.NextState = 8; return true; } switch (ctx.L.input_char) { case '+': case '-': ctx.L.string_buffer.Append ((char) ctx.L.input_char); ctx.NextState = 8; return true; default: return false; } } private static bool State8 (FsmContext ctx) { while (ctx.L.GetChar ()) { if (ctx.L.input_char >= '0' && ctx.L.input_char<= '9') { ctx.L.string_buffer.Append ((char) ctx.L.input_char); continue; } if (ctx.L.input_char == ' ' || ctx.L.input_char >= '\t' && ctx.L.input_char<= '\r') { ctx.Return = true; ctx.NextState = 1; return true; } switch (ctx.L.input_char) { case ',': case ']': case '}': ctx.L.UngetChar (); ctx.Return = true; ctx.NextState = 1; return true; default: return false; } } return true; } private static bool State9 (FsmContext ctx) { ctx.L.GetChar (); switch (ctx.L.input_char) { case 'r': ctx.NextState = 10; return true; default: return false; } } private static bool State10 (FsmContext ctx) { ctx.L.GetChar (); switch (ctx.L.input_char) { case 'u': ctx.NextState = 11; return true; default: return false; } } private static bool State11 (FsmContext ctx) { ctx.L.GetChar (); switch (ctx.L.input_char) { case 'e': ctx.Return = true; ctx.NextState = 1; return true; default: return false; } } private static bool State12 (FsmContext ctx) { ctx.L.GetChar (); switch (ctx.L.input_char) { case 'a': ctx.NextState = 13; return true; default: return false; } } private static bool State13 (FsmContext ctx) { ctx.L.GetChar (); switch (ctx.L.input_char) { case 'l': ctx.NextState = 14; return true; default: return false; } } private static bool State14 (FsmContext ctx) { ctx.L.GetChar (); switch (ctx.L.input_char) { case 's': ctx.NextState = 15; return true; default: return false; } } private static bool State15 (FsmContext ctx) { ctx.L.GetChar (); switch (ctx.L.input_char) { case 'e': ctx.Return = true; ctx.NextState = 1; return true; default: return false; } } private static bool State16 (FsmContext ctx) { ctx.L.GetChar (); switch (ctx.L.input_char) { case 'u': ctx.NextState = 17; return true; default: return false; } } private static bool State17 (FsmContext ctx) { ctx.L.GetChar (); switch (ctx.L.input_char) { case 'l': ctx.NextState = 18; return true; default: return false; } } private static bool State18 (FsmContext ctx) { ctx.L.GetChar (); switch (ctx.L.input_char) { case 'l': ctx.Return = true; ctx.NextState = 1; return true; default: return false; } } private static bool State19 (FsmContext ctx) { while (ctx.L.GetChar ()) { switch (ctx.L.input_char) { case '"': ctx.L.UngetChar (); ctx.Return = true; ctx.NextState = 20; return true; case '\\': ctx.StateStack = 19; ctx.NextState = 21; return true; default: ctx.L.string_buffer.Append ((char) ctx.L.input_char); continue; } } return true; } private static bool State20 (FsmContext ctx) { ctx.L.GetChar (); switch (ctx.L.input_char) { case '"': ctx.Return = true; ctx.NextState = 1; return true; default: return false; } } private static bool State21 (FsmContext ctx) { ctx.L.GetChar (); switch (ctx.L.input_char) { case 'u': ctx.NextState = 22; return true; case '"': case '\'': case '/': case '\\': case 'b': case 'f': case 'n': case 'r': case 't': ctx.L.string_buffer.Append ( ProcessEscChar (ctx.L.input_char)); ctx.NextState = ctx.StateStack; return true; default: return false; } } private static bool State22 (FsmContext ctx) { int counter = 0; int mult = 4096; ctx.L.unichar = 0; while (ctx.L.GetChar ()) { if (ctx.L.input_char >= '0' && ctx.L.input_char <= '9' || ctx.L.input_char >= 'A' && ctx.L.input_char <= 'F' || ctx.L.input_char >= 'a' && ctx.L.input_char <= 'f') { ctx.L.unichar += HexValue (ctx.L.input_char) * mult; counter++; mult /= 16; if (counter == 4) { ctx.L.string_buffer.Append ( Convert.ToChar (ctx.L.unichar)); ctx.NextState = ctx.StateStack; return true; } continue; } return false; } return true; } private static bool State23 (FsmContext ctx) { while (ctx.L.GetChar ()) { switch (ctx.L.input_char) { case '\'': ctx.L.UngetChar (); ctx.Return = true; ctx.NextState = 24; return true; case '\\': ctx.StateStack = 23; ctx.NextState = 21; return true; default: ctx.L.string_buffer.Append ((char) ctx.L.input_char); continue; } } return true; } private static bool State24 (FsmContext ctx) { ctx.L.GetChar (); switch (ctx.L.input_char) { case '\'': ctx.L.input_char = '"'; ctx.Return = true; ctx.NextState = 1; return true; default: return false; } } private static bool State25 (FsmContext ctx) { ctx.L.GetChar (); switch (ctx.L.input_char) { case '*': ctx.NextState = 27; return true; case '/': ctx.NextState = 26; return true; default: return false; } } private static bool State26 (FsmContext ctx) { while (ctx.L.GetChar ()) { if (ctx.L.input_char == '\n') { ctx.NextState = 1; return true; } } return true; } private static bool State27 (FsmContext ctx) { while (ctx.L.GetChar ()) { if (ctx.L.input_char == '*') { ctx.NextState = 28; return true; } } return true; } private static bool State28 (FsmContext ctx) { while (ctx.L.GetChar ()) { if (ctx.L.input_char == '*') continue; if (ctx.L.input_char == '/') { ctx.NextState = 1; return true; } ctx.NextState = 27; return true; } return true; } #endregion private bool GetChar () { if ((input_char = NextChar ()) != -1) return true; end_of_input = true; return false; } private int NextChar () { if (input_buffer != 0) { int tmp = input_buffer; input_buffer = 0; return tmp; } return reader.Read (); } public bool NextToken () { StateHandler handler; fsm_context.Return = false; while (true) { handler = fsm_handler_table[state - 1]; if (! handler (fsm_context)) throw new JsonException (input_char); if (end_of_input) return false; if (fsm_context.Return) { string_value = string_buffer.ToString (); string_buffer.Remove (0, string_buffer.Length); token = fsm_return_table[state - 1]; if (token == (int) ParserToken.Char) token = input_char; state = fsm_context.NextState; return true; } state = fsm_context.NextState; } } private void UngetChar () { input_buffer = input_char; } } }