Files
nub-lang/compiler/Parser.cs
nub31 6ae10d5f90 ...
2026-02-10 19:50:55 +01:00

759 lines
24 KiB
C#

using System.Diagnostics.CodeAnalysis;
namespace Compiler;
public sealed class Parser(string fileName, List<Token> tokens)
{
public static Ast? Parse(string fileName, List<Token> tokens, out List<Diagnostic> diagnostics)
{
return new Parser(fileName, tokens).Parse(out diagnostics);
}
private int index;
private Ast? Parse(out List<Diagnostic> diagnostics)
{
var definitions = new List<NodeDefinition>();
diagnostics = [];
TokenIdent? moduleName = null;
try
{
ExpectKeyword(Keyword.Module);
moduleName = ExpectIdent();
while (Peek() != null)
{
definitions.Add(ParseDefinition());
}
}
catch (CompileException e)
{
diagnostics.Add(e.Diagnostic);
}
if (moduleName == null || diagnostics.Any(x => x.Severity == DiagnosticSeverity.Error))
return null;
return new Ast(fileName, moduleName, definitions);
}
private NodeDefinition ParseDefinition()
{
var startIndex = index;
if (TryExpectKeyword(Keyword.Func))
{
var name = ExpectIdent();
var parameters = new List<NodeDefinitionFunc.Param>();
ExpectSymbol(Symbol.OpenParen);
while (!TryExpectSymbol(Symbol.CloseParen))
{
var paramStartIndex = index;
var parameterName = ExpectIdent();
ExpectSymbol(Symbol.Colon);
var parameterType = ParseType();
parameters.Add(new NodeDefinitionFunc.Param(TokensFrom(paramStartIndex), parameterName, parameterType));
}
ExpectSymbol(Symbol.Colon);
var returnType = ParseType();
var body = ParseStatement();
return new NodeDefinitionFunc(TokensFrom(startIndex), name, parameters, body, returnType);
}
if (TryExpectKeyword(Keyword.Struct))
{
var name = ExpectIdent();
var fields = new List<NodeDefinitionStruct.Field>();
ExpectSymbol(Symbol.OpenCurly);
while (!TryExpectSymbol(Symbol.CloseCurly))
{
var fieldStartIndex = index;
var fieldName = ExpectIdent();
ExpectSymbol(Symbol.Colon);
var fieldType = ParseType();
fields.Add(new NodeDefinitionStruct.Field(TokensFrom(fieldStartIndex), fieldName, fieldType));
}
return new NodeDefinitionStruct(TokensFrom(startIndex), name, fields);
}
throw new CompileException(Diagnostic.Error("Not a valid definition").At(fileName, Peek()).Build());
}
private NodeStatement ParseStatement()
{
var startIndex = index;
if (TryExpectSymbol(Symbol.OpenCurly))
{
var statements = new List<NodeStatement>();
while (!TryExpectSymbol(Symbol.CloseCurly))
statements.Add(ParseStatement());
return new NodeStatementBlock(TokensFrom(startIndex), statements);
}
if (TryExpectKeyword(Keyword.Return))
{
var value = ParseExpression();
return new NodeStatementReturn(TokensFrom(startIndex), value);
}
if (TryExpectKeyword(Keyword.Let))
{
var name = ExpectIdent();
ExpectSymbol(Symbol.Colon);
var type = ParseType();
ExpectSymbol(Symbol.Equal);
var value = ParseExpression();
return new NodeStatementVariableDeclaration(TokensFrom(startIndex), name, type, value);
}
if (TryExpectKeyword(Keyword.If))
{
var condition = ParseExpression();
var thenBlock = ParseStatement();
NodeStatement? elseBlock = null;
if (TryExpectKeyword(Keyword.Else))
elseBlock = ParseStatement();
return new NodeStatementIf(TokensFrom(startIndex), condition, thenBlock, elseBlock);
}
if (TryExpectKeyword(Keyword.While))
{
var condition = ParseExpression();
var thenBlock = ParseStatement();
return new NodeStatementWhile(TokensFrom(startIndex), condition, thenBlock);
}
var target = ParseExpression();
if (TryExpectSymbol(Symbol.Equal))
{
var value = ParseExpression();
return new NodeStatementAssignment(TokensFrom(startIndex), target, value);
}
return new NodeStatementExpression(TokensFrom(startIndex), target);
}
private NodeExpression ParseExpression(int minPrecedence = -1)
{
var startIndex = index;
var left = ParseExpressionLeaf();
while (TryPeekBinaryOperator(out var op) && GetPrecedence(op) >= minPrecedence)
{
Next();
var right = ParseExpression(GetPrecedence(op) + 1);
left = new NodeExpressionBinary(TokensFrom(startIndex), left, op, right);
}
return left;
}
private static int GetPrecedence(NodeExpressionBinary.Op operation)
{
return operation switch
{
NodeExpressionBinary.Op.Multiply => 10,
NodeExpressionBinary.Op.Divide => 10,
NodeExpressionBinary.Op.Modulo => 10,
NodeExpressionBinary.Op.Add => 9,
NodeExpressionBinary.Op.Subtract => 9,
NodeExpressionBinary.Op.LeftShift => 8,
NodeExpressionBinary.Op.RightShift => 8,
NodeExpressionBinary.Op.GreaterThan => 7,
NodeExpressionBinary.Op.GreaterThanOrEqual => 7,
NodeExpressionBinary.Op.LessThan => 7,
NodeExpressionBinary.Op.LessThanOrEqual => 7,
NodeExpressionBinary.Op.Equal => 7,
NodeExpressionBinary.Op.NotEqual => 7,
// NodeExpressionBinary.Op.BitwiseAnd => 6,
// NodeExpressionBinary.Op.BitwiseXor => 5,
// NodeExpressionBinary.Op.BitwiseOr => 4,
NodeExpressionBinary.Op.LogicalAnd => 3,
NodeExpressionBinary.Op.LogicalOr => 2,
_ => throw new ArgumentOutOfRangeException(nameof(operation), operation, null)
};
}
private NodeExpression ParseExpressionLeaf()
{
var startIndex = index;
NodeExpression expr;
if (TryExpectSymbol(Symbol.OpenParen))
{
var value = ParseExpression();
ExpectSymbol(Symbol.CloseParen);
expr = value;
}
else if (TryExpectSymbol(Symbol.Minus))
{
var target = ParseExpression();
expr = new NodeExpressionUnary(TokensFrom(startIndex), target, NodeExpressionUnary.Op.Negate);
}
else if (TryExpectSymbol(Symbol.Bang))
{
var target = ParseExpression();
expr = new NodeExpressionUnary(TokensFrom(startIndex), target, NodeExpressionUnary.Op.Invert);
}
else if (TryExpectIntLiteral(out var intLiteral))
{
expr = new NodeExpressionIntLiteral(TokensFrom(startIndex), intLiteral);
}
else if (TryExpectStringLiteral(out var stringLiteral))
{
expr = new NodeExpressionStringLiteral(TokensFrom(startIndex), stringLiteral);
}
else if (TryExpectBoolLiteral(out var boolLiteral))
{
expr = new NodeExpressionBoolLiteral(TokensFrom(startIndex), boolLiteral);
}
else if (TryExpectIdent(out var ident))
{
if (TryExpectSymbol(Symbol.ColonColon))
{
var name = ExpectIdent();
expr = new NodeExpressionModuleIdent(TokensFrom(startIndex), ident, name);
}
else
{
expr = new NodeExpressionLocalIdent(TokensFrom(startIndex), ident);
}
}
else if (TryExpectKeyword(Keyword.Struct))
{
var module = ExpectIdent();
ExpectSymbol(Symbol.ColonColon);
var name = ExpectIdent();
var initializers = new List<NodeExpressionStructLiteral.Initializer>();
ExpectSymbol(Symbol.OpenCurly);
while (!TryExpectSymbol(Symbol.CloseCurly))
{
var initializerStartIndex = startIndex;
var fieldName = ExpectIdent();
ExpectSymbol(Symbol.Equal);
var fieldValue = ParseExpression();
initializers.Add(new NodeExpressionStructLiteral.Initializer(TokensFrom(initializerStartIndex), fieldName, fieldValue));
}
expr = new NodeExpressionStructLiteral(TokensFrom(startIndex), module, name, initializers);
}
else
{
throw new CompileException(Diagnostic.Error("Expected start of expression").At(fileName, Peek()).Build());
}
while (true)
{
if (TryExpectSymbol(Symbol.Period))
{
var name = ExpectIdent();
expr = new NodeExpressionMemberAccess(TokensFrom(startIndex), expr, name);
}
else if (TryExpectSymbol(Symbol.OpenParen))
{
var parameters = new List<NodeExpression>();
while (!TryExpectSymbol(Symbol.CloseParen))
parameters.Add(ParseExpression());
expr = new NodeExpressionFuncCall(TokensFrom(startIndex), expr, parameters);
}
else
{
break;
}
}
return expr;
}
private NodeType ParseType()
{
var startIndex = index;
if (TryExpectSymbol(Symbol.Caret))
{
var to = ParseType();
return new NodeTypePointer(TokensFrom(startIndex), to);
}
if (TryExpectKeyword(Keyword.Func))
{
var parameters = new List<NodeType>();
ExpectSymbol(Symbol.OpenParen);
while (!TryExpectSymbol(Symbol.CloseParen))
{
parameters.Add(ParseType());
}
ExpectSymbol(Symbol.Colon);
var returnType = ParseType();
return new NodeTypeFunc(TokensFrom(startIndex), parameters, returnType);
}
if (TryExpectIdent(out var ident))
{
switch (ident.Ident)
{
case "void":
return new NodeTypeVoid(TokensFrom(startIndex));
case "string":
return new NodeTypeString(TokensFrom(startIndex));
case "bool":
return new NodeTypeBool(TokensFrom(startIndex));
case "i8":
return new NodeTypeSInt(TokensFrom(startIndex), 8);
case "i16":
return new NodeTypeSInt(TokensFrom(startIndex), 16);
case "i32":
return new NodeTypeSInt(TokensFrom(startIndex), 32);
case "i64":
return new NodeTypeSInt(TokensFrom(startIndex), 64);
case "u8":
return new NodeTypeUInt(TokensFrom(startIndex), 8);
case "u16":
return new NodeTypeUInt(TokensFrom(startIndex), 16);
case "u32":
return new NodeTypeUInt(TokensFrom(startIndex), 32);
case "u64":
return new NodeTypeUInt(TokensFrom(startIndex), 64);
default:
ExpectSymbol(Symbol.ColonColon);
var name = ExpectIdent();
return new NodeTypeCustom(TokensFrom(startIndex), ident, name);
}
}
throw new CompileException(Diagnostic.Error("Expected type").At(fileName, Peek()).Build());
}
private List<Token> TokensFrom(int startIndex)
{
return tokens.GetRange(startIndex, index - startIndex);
}
private void ExpectKeyword(Keyword keyword)
{
if (Peek() is TokenKeyword token && token.Keyword == keyword)
{
Next();
return;
}
throw new CompileException(Diagnostic.Error($"Expected '{keyword.AsString()}'").At(fileName, Peek()).Build());
}
private bool TryExpectKeyword(Keyword keyword)
{
if (Peek() is TokenKeyword token && token.Keyword == keyword)
{
Next();
return true;
}
return false;
}
private void ExpectSymbol(Symbol symbol)
{
if (Peek() is TokenSymbol token && token.Symbol == symbol)
{
Next();
return;
}
throw new CompileException(Diagnostic.Error($"Expected '{symbol.AsString()}'").At(fileName, Peek()).Build());
}
private bool TryExpectSymbol(Symbol symbol)
{
if (Peek() is TokenSymbol token && token.Symbol == symbol)
{
Next();
return true;
}
return false;
}
private TokenIdent ExpectIdent()
{
if (Peek() is TokenIdent token)
{
Next();
return token;
}
throw new CompileException(Diagnostic.Error("Expected identifier").At(fileName, Peek()).Build());
}
private bool TryExpectIdent([NotNullWhen(true)] out TokenIdent? ident)
{
if (Peek() is TokenIdent token)
{
Next();
ident = token;
return true;
}
ident = null;
return false;
}
private bool TryExpectIntLiteral([NotNullWhen(true)] out TokenIntLiteral? intLiteral)
{
if (Peek() is TokenIntLiteral token)
{
Next();
intLiteral = token;
return true;
}
intLiteral = null;
return false;
}
private bool TryExpectStringLiteral([NotNullWhen(true)] out TokenStringLiteral? stringLiteral)
{
if (Peek() is TokenStringLiteral token)
{
Next();
stringLiteral = token;
return true;
}
stringLiteral = null;
return false;
}
private bool TryExpectBoolLiteral([NotNullWhen(true)] out TokenBoolLiteral? boolLiteral)
{
if (Peek() is TokenBoolLiteral token)
{
Next();
boolLiteral = token;
return true;
}
boolLiteral = null;
return false;
}
private void Next()
{
if (index >= tokens.Count)
throw new CompileException(Diagnostic.Error("Unexpected end of tokens").At(fileName, Peek()).Build());
index += 1;
}
private Token? Peek(int offset = 0)
{
if (index + offset >= tokens.Count)
return null;
return tokens[index + offset];
}
private bool TryPeekBinaryOperator(out NodeExpressionBinary.Op op)
{
if (Peek() is not TokenSymbol token)
{
op = default;
return false;
}
switch (token.Symbol)
{
case Symbol.Plus:
op = NodeExpressionBinary.Op.Add;
return true;
case Symbol.Minus:
op = NodeExpressionBinary.Op.Subtract;
return true;
case Symbol.Star:
op = NodeExpressionBinary.Op.Multiply;
return true;
case Symbol.ForwardSlash:
op = NodeExpressionBinary.Op.Divide;
return true;
case Symbol.Percent:
op = NodeExpressionBinary.Op.Modulo;
return true;
case Symbol.BangEqual:
op = NodeExpressionBinary.Op.NotEqual;
return true;
case Symbol.EqualEqual:
op = NodeExpressionBinary.Op.Equal;
return true;
case Symbol.LessThan:
op = NodeExpressionBinary.Op.LessThan;
return true;
case Symbol.LessThanEqual:
op = NodeExpressionBinary.Op.LessThanOrEqual;
return true;
case Symbol.GreaterThan:
op = NodeExpressionBinary.Op.GreaterThan;
return true;
case Symbol.GreaterThanEqual:
op = NodeExpressionBinary.Op.GreaterThanOrEqual;
return true;
case Symbol.LessThanLessThan:
op = NodeExpressionBinary.Op.LeftShift;
return true;
case Symbol.GreaterThanGreaterThan:
op = NodeExpressionBinary.Op.RightShift;
return true;
case Symbol.AmpersandAmpersand:
op = NodeExpressionBinary.Op.LogicalAnd;
return true;
case Symbol.PipePipe:
op = NodeExpressionBinary.Op.LogicalOr;
return true;
default:
op = default;
return false;
}
}
}
public sealed class Ast(string fileName, TokenIdent moduleName, List<NodeDefinition> definitions)
{
public string FileName { get; } = fileName;
public TokenIdent ModuleName { get; } = moduleName;
public List<NodeDefinition> Definitions { get; } = definitions;
}
public abstract class Node(List<Token> tokens)
{
public List<Token> Tokens { get; } = tokens;
}
public abstract class NodeDefinition(List<Token> tokens) : Node(tokens);
public sealed class NodeDefinitionFunc(List<Token> tokens, TokenIdent name, List<NodeDefinitionFunc.Param> parameters, NodeStatement body, NodeType returnType) : NodeDefinition(tokens)
{
public TokenIdent Name { get; } = name;
public List<Param> Parameters { get; } = parameters;
public NodeStatement Body { get; } = body;
public NodeType ReturnType { get; } = returnType;
public sealed class Param(List<Token> tokens, TokenIdent name, NodeType type) : Node(tokens)
{
public TokenIdent Name { get; } = name;
public NodeType Type { get; } = type;
}
}
public sealed class NodeDefinitionStruct(List<Token> tokens, TokenIdent name, List<NodeDefinitionStruct.Field> fields) : NodeDefinition(tokens)
{
public TokenIdent Name { get; } = name;
public List<Field> Fields { get; } = fields;
public sealed class Field(List<Token> tokens, TokenIdent name, NodeType type) : Node(tokens)
{
public TokenIdent Name { get; } = name;
public NodeType Type { get; } = type;
}
}
public abstract class NodeStatement(List<Token> tokens) : Node(tokens);
public sealed class NodeStatementBlock(List<Token> tokens, List<NodeStatement> statements) : NodeStatement(tokens)
{
public List<NodeStatement> Statements { get; } = statements;
}
public sealed class NodeStatementExpression(List<Token> tokens, NodeExpression expression) : NodeStatement(tokens)
{
public NodeExpression Expression { get; } = expression;
}
public sealed class NodeStatementReturn(List<Token> tokens, NodeExpression value) : NodeStatement(tokens)
{
public NodeExpression Value { get; } = value;
}
public sealed class NodeStatementVariableDeclaration(List<Token> tokens, TokenIdent name, NodeType type, NodeExpression value) : NodeStatement(tokens)
{
public TokenIdent Name { get; } = name;
public NodeType Type { get; } = type;
public NodeExpression Value { get; } = value;
}
public sealed class NodeStatementAssignment(List<Token> tokens, NodeExpression target, NodeExpression value) : NodeStatement(tokens)
{
public NodeExpression Target { get; } = target;
public NodeExpression Value { get; } = value;
}
public sealed class NodeStatementIf(List<Token> tokens, NodeExpression condition, NodeStatement thenBlock, NodeStatement? elseBlock) : NodeStatement(tokens)
{
public NodeExpression Condition { get; } = condition;
public NodeStatement ThenBlock { get; } = thenBlock;
public NodeStatement? ElseBlock { get; } = elseBlock;
}
public sealed class NodeStatementWhile(List<Token> tokens, NodeExpression condition, NodeStatement block) : NodeStatement(tokens)
{
public NodeExpression Condition { get; } = condition;
public NodeStatement Block { get; } = block;
}
public abstract class NodeExpression(List<Token> tokens) : Node(tokens);
public sealed class NodeExpressionIntLiteral(List<Token> tokens, TokenIntLiteral value) : NodeExpression(tokens)
{
public TokenIntLiteral Value { get; } = value;
}
public sealed class NodeExpressionStringLiteral(List<Token> tokens, TokenStringLiteral value) : NodeExpression(tokens)
{
public TokenStringLiteral Value { get; } = value;
}
public sealed class NodeExpressionBoolLiteral(List<Token> tokens, TokenBoolLiteral value) : NodeExpression(tokens)
{
public TokenBoolLiteral Value { get; } = value;
}
public sealed class NodeExpressionStructLiteral(List<Token> tokens, TokenIdent module, TokenIdent name, List<NodeExpressionStructLiteral.Initializer> initializers) : NodeExpression(tokens)
{
public TokenIdent Module { get; } = module;
public TokenIdent Name { get; } = name;
public List<Initializer> Initializers { get; } = initializers;
public sealed class Initializer(List<Token> tokens, TokenIdent name, NodeExpression value) : Node(tokens)
{
public TokenIdent Name { get; } = name;
public NodeExpression Value { get; } = value;
}
}
public sealed class NodeExpressionMemberAccess(List<Token> tokens, NodeExpression target, TokenIdent name) : NodeExpression(tokens)
{
public NodeExpression Target { get; } = target;
public TokenIdent Name { get; } = name;
}
public sealed class NodeExpressionFuncCall(List<Token> tokens, NodeExpression target, List<NodeExpression> parameters) : NodeExpression(tokens)
{
public NodeExpression Target { get; } = target;
public List<NodeExpression> Parameters { get; } = parameters;
}
public sealed class NodeExpressionLocalIdent(List<Token> tokens, TokenIdent value) : NodeExpression(tokens)
{
public TokenIdent Value { get; } = value;
}
public sealed class NodeExpressionModuleIdent(List<Token> tokens, TokenIdent module, TokenIdent value) : NodeExpression(tokens)
{
public TokenIdent Module { get; } = module;
public TokenIdent Value { get; } = value;
}
public sealed class NodeExpressionBinary(List<Token> tokens, NodeExpression left, NodeExpressionBinary.Op operation, NodeExpression right) : NodeExpression(tokens)
{
public NodeExpression Left { get; } = left;
public Op Operation { get; } = operation;
public NodeExpression Right { get; } = right;
public enum Op
{
Add,
Subtract,
Multiply,
Divide,
Modulo,
Equal,
NotEqual,
LessThan,
LessThanOrEqual,
GreaterThan,
GreaterThanOrEqual,
LeftShift,
RightShift,
// BitwiseAnd,
// BitwiseXor,
// BitwiseOr,
LogicalAnd,
LogicalOr,
}
}
public sealed class NodeExpressionUnary(List<Token> tokens, NodeExpression target, NodeExpressionUnary.Op op) : NodeExpression(tokens)
{
public NodeExpression Target { get; } = target;
public Op Operation { get; } = op;
public enum Op
{
Negate,
Invert,
}
}
public abstract class NodeType(List<Token> tokens) : Node(tokens);
public sealed class NodeTypeVoid(List<Token> tokens) : NodeType(tokens);
public sealed class NodeTypeUInt(List<Token> tokens, int width) : NodeType(tokens)
{
public int Width { get; } = width;
}
public sealed class NodeTypeSInt(List<Token> tokens, int width) : NodeType(tokens)
{
public int Width { get; } = width;
}
public sealed class NodeTypeBool(List<Token> tokens) : NodeType(tokens);
public sealed class NodeTypeString(List<Token> tokens) : NodeType(tokens);
public sealed class NodeTypeCustom(List<Token> tokens, TokenIdent module, TokenIdent name) : NodeType(tokens)
{
public TokenIdent Module { get; } = module;
public TokenIdent Name { get; } = name;
}
public sealed class NodeTypePointer(List<Token> tokens, NodeType to) : NodeType(tokens)
{
public NodeType To { get; } = to;
}
public sealed class NodeTypeFunc(List<Token> tokens, List<NodeType> parameters, NodeType returnType) : NodeType(tokens)
{
public List<NodeType> Parameters { get; } = parameters;
public NodeType ReturnType { get; } = returnType;
}