C#中实现JSON解析器
JSON(JavaScript Object Notation)即 JavaScript 对象表示法,是一种轻量级的数据交换格式。
起源与发展
JSON 源于 JavaScript 编程语言,是 JavaScript 对象字面量语法的一个子集。但如今它已经独立于 JavaScript,成为一种通用的数据格式,广泛应用于各种编程语言和系统之间的数据交换。由于其简洁性和通用性,在 Web 开发、移动应用开发、云计算等领域得到了极为广泛的应用。
特点
轻量级:相较于 XML 等其他数据交换格式,JSON 的数据结构更加简洁,文件体积更小,这使得在网络传输过程中能够减少带宽占用,提高传输效率。
易读性:JSON 的文本格式清晰、简洁,符合人类的阅读习惯,开发人员可以很容易地理解和解析其中的数据内容。
跨语言兼容性:几乎所有的现代编程语言都提供了对 JSON 的支持,能够方便地将 JSON 数据解析为本地数据结构,或者将本地数据结构转换为 JSON 格式,这使得不同编程语言开发的系统之间能够轻松进行数据交换。
实现思路:
词法分析
词法分析的目的是将输入的 JSON 字符串分解为一个个的词法单元,例如花括号、方括号、逗号、冒号、字符串、数字、布尔值和空值等。我们可以定义一个 Token 类来表示词法单元,以及一个 Tokenizer 类来进行词法分析。
语法分析
语法分析的目的是根据词法单元构建 JSON 对象或数组。我们可以定义一个 JsonParser 类来进行语法分析,它将接收一个 Tokenizer 对象作为输入,并返回解析后的 JSON 对象或数组。
代码实现:
1.定义Token类:
public enum TokenType
{STRING,NUMBER,BOOLEAN,NULL,LEFT_BRACE,RIGHT_BRACE,LEFT_BRACKET,RIGHT_BRACKET,COMMA,COLON,EOF
}public class Token
{public TokenType Type { get; }public object Value { get; }public Token(TokenType type, object value){Type = type;Value = value;}
}
2.实现扫描类:
public class Scanner
{private readonly string source;private int start;private int current;private int line = 1;private readonly List<Token> tokens = new List<Token>();public Scanner(string source){this.source = source;}public List<Token> Scan(){while (!IsAtEnd()){start = current;ScanToken();}tokens.Add(new Token(TokenType.EOF, null));return tokens;}private bool IsAtEnd() => current >= source.Length;private void ScanToken(){char c = Advance();switch (c){case '{':AddToken(TokenType.LEFT_BRACE, c);break;case '}':AddToken(TokenType.RIGHT_BRACE, c);break;case '[':AddToken(TokenType.LEFT_BRACKET, c);break;case ']':AddToken(TokenType.RIGHT_BRACKET, c);break;case ',':AddToken(TokenType.COMMA, c);break;case ':':AddToken(TokenType.COLON, c);break;case '\n':line++;break;case ' ':break;case '"':AddString();break;case '-':if (char.IsDigit(Peek())){Advance();AddNumber();}else{throw new ArgumentException($"'-' must be followed by a digit at line {line}");}break;default:if (char.IsDigit(c)){AddNumber();}else if (char.IsLetter(c)){AddKeyword();}else{throw new ArgumentException($"Unexpected character '{c}' at line {line}");}break;}}private void AddToken(TokenType type, char character){tokens.Add(new Token(type, character.ToString()));}private char Advance() => source[current++];private void AddString(){while (Peek() != '"' && !IsAtEnd()){if (Peek() == '\n') line++;Advance();}if (IsAtEnd())throw new ArgumentException($"Unterminated string at line {line}");Advance(); // Consume closing '"'int startIndex = start + 1;int length = current - start - 2;string value = source.Substring(startIndex, length);tokens.Add(new Token(TokenType.STRING, value));}private void AddNumber(){while (char.IsDigit(Peek())) Advance();if (Peek() == '.' && char.IsDigit(PeekNext())){Advance(); // Consume '.'while (char.IsDigit(Peek())) Advance();ParseNumber(true);}else{ParseNumber(false);}}private void ParseNumber(bool isDouble){string numberStr = source.Substring(start, current - start);if (isDouble){if (double.TryParse(numberStr, out double result))tokens.Add(new Token(TokenType.NUMBER, result));elsethrow new ArgumentException($"Invalid float format '{numberStr}' at line {line}");}else{if (int.TryParse(numberStr, out int result))tokens.Add(new Token(TokenType.NUMBER, result));elsethrow new ArgumentException($"Invalid integer format '{numberStr}' at line {line}");}}private char Peek() => IsAtEnd() ? '\0' : source[current];private char PeekNext() => (current + 1 >= source.Length) ? '\0' : source[current + 1];private void AddKeyword(){while (char.IsLetter(Peek())) Advance();string keyword = source.Substring(start, current - start);switch (keyword){case "true":tokens.Add(new Token(TokenType.BOOLEAN, true));break;case "false":tokens.Add(new Token(TokenType.BOOLEAN, false));break;case "null":tokens.Add(new Token(TokenType.NULL, null));break;default:throw new ArgumentException($"Unexpected keyword '{keyword}' at line {line}");}}
}
3.实现解析类:
public class Parser
{private readonly List<Token> tokens;private int current;public Parser(List<Token> tokens){this.tokens = tokens;current = 0;}public object Parse(){Token token = Advance();return ParseFromToken(token);}private object ParseFromToken(Token token){switch (token.Type){case TokenType.STRING:case TokenType.NUMBER:case TokenType.BOOLEAN:case TokenType.NULL:return token.Value;case TokenType.LEFT_BRACE:return ParseObject();case TokenType.LEFT_BRACKET:return ParseArray();default:throw new ArgumentException($"Unexpected token type: {token.Type}", nameof(token));}}private Dictionary<string, object> ParseObject(){var jsonObject = new Dictionary<string, object>();Token keyToken = Advance();while (keyToken.Type != TokenType.RIGHT_BRACE){if (keyToken.Type == TokenType.EOF)throw new ArgumentException("Unterminated JSON object");if (keyToken.Type != TokenType.STRING)throw new ArgumentException("JSON object fields must begin with a string key");Consume(TokenType.COLON, "Key-value pairs must be separated by colon");Token valueToken = Advance();jsonObject[(string)keyToken.Value] = ParseFromToken(valueToken);ConsumeCommaUnless(TokenType.RIGHT_BRACE);keyToken = Advance();}return jsonObject;}private List<object> ParseArray(){var jsonArray = new List<object>();Token token = Advance();while (token.Type != TokenType.RIGHT_BRACKET){if (token.Type == TokenType.EOF)throw new ArgumentException("Unterminated JSON array");jsonArray.Add(ParseFromToken(token));ConsumeCommaUnless(TokenType.RIGHT_BRACKET);token = Advance();}return jsonArray;}private Token Advance(){if (current >= tokens.Count)throw new IndexOutOfRangeException("Unexpected end of token stream");return tokens[current++];}private void Consume(TokenType expectedType, string errorMessage){if (Peek().Type != expectedType)throw new ArgumentException($"{errorMessage}. Expected: {expectedType}, Actual: {Peek().Type}");current++;}private void ConsumeCommaUnless(TokenType exceptionType){if (Peek().Type == TokenType.COMMA){current++;return;}if (Peek().Type != exceptionType)throw new ArgumentException($"Missing comma before token: {Peek().Type}");}private Token Peek(){if (current >= tokens.Count)return new Token(TokenType.EOF, null);return tokens[current];}
}
4.测试类:
public class JsonParser : MonoBehaviour
{string jsonString1= @"{""name"": ""张三"",""age"": 28,""is_student"": false,""hobbies"": [""阅读"", ""游泳""],""address"": {""street"": ""人民路123号"",""city"": ""北京""}}";void Start(){var scanner = new Scanner(jsonString1);List<Token> tokens = scanner.Scan();var parser = new Parser(tokens);object parsed = parser.Parse();this.TranslateJsonObj(parsed);}void TranslateJsonObj(object result){switch (result){case Dictionary<string, object> obj:HandleObject(obj);break;case List<object> arr:HandleArray(arr);break;case string s:Debug.Log($"字符串值: {s}");break;case double d:Debug.Log($"数字值: {d}");break;case bool b:Debug.Log($"布尔值: {b}");break;case null:Debug.Log("空值");break;}}void HandleObject(Dictionary<string, object> obj){string typeName = "";foreach (var item in obj){typeName = item.Value.GetType().Name;if (typeName.Contains("List`1") || typeName.Contains("Dictionary`2")){Debug.Log($"Key: {item.Key}");TranslateJsonObj(item.Value);}else{Debug.Log($"Key: {item.Key} Value: {item.Value}");}}}void HandleArray(List<object> arr){foreach (var item in arr){Debug.Log($"object: {item}");}}
}
结果:
参考链接:
Implement a JSON parser from scratch in 10 minutes (Python) (youtube.com)
Build Your Own JSON Parser | codingchallenges.fyi (youtube.com)
JSON Parser in C | Lexer (youtube.com)
Let's Build a Superfast JSON Parser from scratch in C# - Part 1 The Lexer (youtube.com)
Let's Build a Superfast JSON Parser from scratch in C# Part 2 The Parser and Deserialization (youtube.com)
JSON - 维基百科,自由的百科全书 (wikipedia.org)