This commit is contained in:
nub31
2025-10-25 18:07:34 +02:00
parent 3f18aa4782
commit 396ddf93a2
18 changed files with 951 additions and 598 deletions

View File

@@ -7,19 +7,23 @@ public sealed class Module
var modules = new Dictionary<string, Module>();
foreach (var syntaxTree in syntaxTrees)
{
if (!modules.TryGetValue(syntaxTree.ModuleName, out var module))
var moduleDeclaration = syntaxTree.TopLevelSyntaxNodes.OfType<ModuleSyntax>().FirstOrDefault();
if (moduleDeclaration != null)
{
module = new Module();
modules.Add(syntaxTree.ModuleName, module);
}
if (!modules.TryGetValue(moduleDeclaration.NameToken.Value, out var module))
{
module = new Module();
modules.Add(moduleDeclaration.NameToken.Value, module);
}
module._definitions.AddRange(syntaxTree.Definitions);
module._definitions.AddRange(syntaxTree.TopLevelSyntaxNodes);
}
}
return modules;
}
private readonly List<DefinitionSyntax> _definitions = [];
private readonly List<TopLevelSyntaxNode> _definitions = [];
public List<StructSyntax> Structs(bool includePrivate)
{

View File

@@ -19,9 +19,7 @@ public sealed class Parser
_tokens = tokens;
_tokenIndex = 0;
string? moduleName = null;
var imports = new List<string>();
var definitions = new List<DefinitionSyntax>();
var topLevelSyntaxNodes = new List<TopLevelSyntaxNode>();
while (HasToken)
{
@@ -29,53 +27,21 @@ public sealed class Parser
{
var startIndex = _tokenIndex;
if (TryExpectSymbol(Symbol.Import))
{
var name = ExpectStringLiteral();
if (imports.Contains(name.Value))
{
Diagnostics.Add(Diagnostic
.Warning($"Module {name.Value} is imported twice")
.At(name)
.WithHelp($"Remove duplicate import \"{name.Value}\"")
.Build());
}
else
{
imports.Add(name.Value);
}
continue;
}
if (TryExpectSymbol(Symbol.Module))
{
if (moduleName != null)
{
throw new ParseException(Diagnostic
.Error("Module is declared more than once")
.At(CurrentToken)
.WithHelp("Remove duplicate module declaration")
.Build());
}
moduleName = ExpectStringLiteral().Value;
continue;
}
var exported = TryExpectSymbol(Symbol.Export);
if (TryExpectSymbol(Symbol.Extern))
{
var externSymbol = ExpectStringLiteral();
ExpectSymbol(Symbol.Func);
definitions.Add(ParseFunc(startIndex, exported, externSymbol.Value));
topLevelSyntaxNodes.Add(ParseFunc(startIndex, exported, externSymbol));
continue;
}
var keyword = ExpectSymbol();
DefinitionSyntax definition = keyword.Symbol switch
TopLevelSyntaxNode definition = keyword.Symbol switch
{
Symbol.Module => ParseModule(startIndex),
Symbol.Import => ParseImport(startIndex),
Symbol.Func => ParseFunc(startIndex, exported, null),
Symbol.Struct => ParseStruct(startIndex, exported),
Symbol.Enum => ParseEnum(startIndex, exported),
@@ -86,7 +52,7 @@ public sealed class Parser
.Build())
};
definitions.Add(definition);
topLevelSyntaxNodes.Add(definition);
}
catch (ParseException e)
{
@@ -103,7 +69,19 @@ public sealed class Parser
}
}
return new SyntaxTree(definitions, moduleName ?? "default", imports);
return new SyntaxTree(topLevelSyntaxNodes);
}
private ImportSyntax ParseImport(int startIndex)
{
var name = ExpectIdentifier();
return new ImportSyntax(GetTokens(startIndex), name);
}
private ModuleSyntax ParseModule(int startIndex)
{
var name = ExpectIdentifier();
return new ModuleSyntax(GetTokens(startIndex), name);
}
private FuncParameterSyntax ParseFuncParameter()
@@ -113,10 +91,10 @@ public sealed class Parser
ExpectSymbol(Symbol.Colon);
var type = ParseType();
return new FuncParameterSyntax(GetTokens(startIndex), name.Value, type);
return new FuncParameterSyntax(GetTokens(startIndex), name, type);
}
private FuncSyntax ParseFunc(int startIndex, bool exported, string? externSymbol)
private FuncSyntax ParseFunc(int startIndex, bool exported, StringLiteralToken? externSymbol)
{
var name = ExpectIdentifier();
List<FuncParameterSyntax> parameters = [];
@@ -136,7 +114,7 @@ public sealed class Parser
var returnType = TryExpectSymbol(Symbol.Colon) ? ParseType() : new VoidTypeSyntax([]);
var prototype = new FuncPrototypeSyntax(GetTokens(startIndex), name.Value, exported, externSymbol, parameters, returnType);
var prototype = new FuncPrototypeSyntax(GetTokens(startIndex), name, exported, externSymbol, parameters, returnType);
BlockSyntax? body = null;
var bodyStartIndex = _tokenIndex;
@@ -160,7 +138,7 @@ public sealed class Parser
{
var memberStartIndex = _tokenIndex;
var fieldName = ExpectIdentifier().Value;
var fieldName = ExpectIdentifier();
ExpectSymbol(Symbol.Colon);
var fieldType = ParseType();
@@ -174,7 +152,7 @@ public sealed class Parser
fields.Add(new StructFieldSyntax(GetTokens(memberStartIndex), fieldName, fieldType, fieldValue));
}
return new StructSyntax(GetTokens(startIndex), name.Value, exported, fields);
return new StructSyntax(GetTokens(startIndex), name, exported, fields);
}
private EnumSyntax ParseEnum(int startIndex, bool exported)
@@ -192,13 +170,11 @@ public sealed class Parser
ExpectSymbol(Symbol.OpenBrace);
long value = -1;
while (!TryExpectSymbol(Symbol.CloseBrace))
{
var memberStartIndex = _tokenIndex;
var fieldName = ExpectIdentifier().Value;
long fieldValue;
var fieldName = ExpectIdentifier();
IntLiteralToken? value = null;
if (TryExpectSymbol(Symbol.Assign))
{
@@ -210,19 +186,13 @@ public sealed class Parser
.Build());
}
fieldValue = Convert.ToInt64(intLiteralToken.Value, intLiteralToken.Base);
value = fieldValue;
}
else
{
fieldValue = value + 1;
value = fieldValue;
value = intLiteralToken;
}
fields.Add(new EnumFieldSyntax(GetTokens(memberStartIndex), fieldName, fieldValue));
fields.Add(new EnumFieldSyntax(GetTokens(memberStartIndex), fieldName, value));
}
return new EnumSyntax(GetTokens(startIndex), name.Value, exported, type, fields);
return new EnumSyntax(GetTokens(startIndex), name, exported, type, fields);
}
private StatementSyntax ParseStatement()
@@ -267,7 +237,7 @@ public sealed class Parser
private VariableDeclarationSyntax ParseVariableDeclaration(int startIndex)
{
var name = ExpectIdentifier().Value;
var name = ExpectIdentifier();
TypeSyntax? explicitType = null;
if (TryExpectSymbol(Symbol.Colon))
@@ -334,12 +304,12 @@ public sealed class Parser
private ForSyntax ParseFor(int startIndex)
{
var itemName = ExpectIdentifier().Value;
string? indexName = null;
var itemName = ExpectIdentifier();
IdentifierToken? indexName = null;
if (TryExpectSymbol(Symbol.Comma))
{
indexName = ExpectIdentifier().Value;
indexName = ExpectIdentifier();
}
ExpectSymbol(Symbol.In);
@@ -467,10 +437,10 @@ public sealed class Parser
var token = ExpectToken();
var expr = token switch
{
BoolLiteralToken boolLiteral => new BoolLiteralSyntax(GetTokens(startIndex), boolLiteral.Value),
StringLiteralToken stringLiteral => new StringLiteralSyntax(GetTokens(startIndex), stringLiteral.Value),
FloatLiteralToken floatLiteral => new FloatLiteralSyntax(GetTokens(startIndex), floatLiteral.Value),
IntLiteralToken intLiteral => new IntLiteralSyntax(GetTokens(startIndex), intLiteral.Value, intLiteral.Base),
BoolLiteralToken boolLiteral => new BoolLiteralSyntax(GetTokens(startIndex), boolLiteral),
StringLiteralToken stringLiteral => new StringLiteralSyntax(GetTokens(startIndex), stringLiteral),
FloatLiteralToken floatLiteral => new FloatLiteralSyntax(GetTokens(startIndex), floatLiteral),
IntLiteralToken intLiteral => new IntLiteralSyntax(GetTokens(startIndex), intLiteral),
IdentifierToken identifier => ParseIdentifier(startIndex, identifier),
SymbolToken symbolToken => symbolToken.Symbol switch
{
@@ -528,10 +498,10 @@ public sealed class Parser
if (TryExpectSymbol(Symbol.DoubleColon))
{
var name = ExpectIdentifier();
return new ModuleIdentifierSyntax(GetTokens(startIndex), identifier.Value, name.Value);
return new ModuleIdentifierSyntax(GetTokens(startIndex), identifier, name);
}
return new LocalIdentifierSyntax(GetTokens(startIndex), identifier.Value);
return new LocalIdentifierSyntax(GetTokens(startIndex), identifier);
}
private ExpressionSyntax ParseParenthesizedExpression()
@@ -560,7 +530,7 @@ public sealed class Parser
if (TryExpectSymbol(Symbol.Period))
{
var member = ExpectIdentifier().Value;
var member = ExpectIdentifier();
expr = new MemberAccessSyntax(GetTokens(startIndex), expr, member);
continue;
}
@@ -627,12 +597,12 @@ public sealed class Parser
return new StructInitializerSyntax(GetTokens(startIndex), type, initializers);
}
private Dictionary<string, ExpressionSyntax> ParseStructInitializerBody()
private Dictionary<IdentifierToken, ExpressionSyntax> ParseStructInitializerBody()
{
Dictionary<string, ExpressionSyntax> initializers = [];
Dictionary<IdentifierToken, ExpressionSyntax> initializers = [];
while (!TryExpectSymbol(Symbol.CloseBrace))
{
var name = ExpectIdentifier().Value;
var name = ExpectIdentifier();
ExpectSymbol(Symbol.Assign);
var value = ParseExpression();
initializers.Add(name, value);
@@ -732,16 +702,16 @@ public sealed class Parser
return new BoolTypeSyntax(GetTokens(startIndex));
default:
{
string? module = null;
IdentifierToken? module = null;
if (TryExpectSymbol(Symbol.DoubleColon))
{
var customTypeName = ExpectIdentifier();
module = name.Value;
module = name;
name = customTypeName;
}
return new CustomTypeSyntax(GetTokens(startIndex), module, name.Value);
return new CustomTypeSyntax(GetTokens(startIndex), module, name);
}
}
}
@@ -780,7 +750,7 @@ public sealed class Parser
{
ExpectSymbol(Symbol.CloseBracket);
var baseType = ParseType();
return new ConstArrayTypeSyntax(GetTokens(startIndex), baseType, Convert.ToInt64(intLiteral.Value, intLiteral.Base));
return new ConstArrayTypeSyntax(GetTokens(startIndex), baseType, intLiteral.AsU64);
}
else if (TryExpectSymbol(Symbol.QuestionMark))
{
@@ -938,7 +908,7 @@ public sealed class Parser
}
}
public record SyntaxTree(List<DefinitionSyntax> Definitions, string ModuleName, List<string> Imports);
public record SyntaxTree(List<TopLevelSyntaxNode> TopLevelSyntaxNodes);
public class ParseException : Exception
{

View File

@@ -4,21 +4,27 @@ public abstract record SyntaxNode(List<Token> Tokens);
#region Definitions
public abstract record DefinitionSyntax(List<Token> Tokens, string Name, bool Exported) : SyntaxNode(Tokens);
public record TopLevelSyntaxNode(List<Token> Tokens) : SyntaxNode(Tokens);
public record FuncParameterSyntax(List<Token> Tokens, string Name, TypeSyntax Type) : SyntaxNode(Tokens);
public record ModuleSyntax(List<Token> Tokens, IdentifierToken NameToken) : TopLevelSyntaxNode(Tokens);
public record FuncPrototypeSyntax(List<Token> Tokens, string Name, bool Exported, string? ExternSymbol, List<FuncParameterSyntax> Parameters, TypeSyntax ReturnType) : SyntaxNode(Tokens);
public record ImportSyntax(List<Token> Tokens, IdentifierToken NameToken) : TopLevelSyntaxNode(Tokens);
public record FuncSyntax(List<Token> Tokens, FuncPrototypeSyntax Prototype, BlockSyntax? Body) : DefinitionSyntax(Tokens, Prototype.Name, Prototype.Exported);
public abstract record DefinitionSyntax(List<Token> Tokens, IdentifierToken NameToken, bool Exported) : TopLevelSyntaxNode(Tokens);
public record StructFieldSyntax(List<Token> Tokens, string Name, TypeSyntax Type, ExpressionSyntax? Value) : SyntaxNode(Tokens);
public record FuncParameterSyntax(List<Token> Tokens, IdentifierToken NameToken, TypeSyntax Type) : SyntaxNode(Tokens);
public record StructSyntax(List<Token> Tokens, string Name, bool Exported, List<StructFieldSyntax> Fields) : DefinitionSyntax(Tokens, Name, Exported);
public record FuncPrototypeSyntax(List<Token> Tokens, IdentifierToken NameToken, bool Exported, StringLiteralToken? ExternSymbolToken, List<FuncParameterSyntax> Parameters, TypeSyntax ReturnType) : SyntaxNode(Tokens);
public record EnumFieldSyntax(List<Token> Tokens, string Name, long Value) : SyntaxNode(Tokens);
public record FuncSyntax(List<Token> Tokens, FuncPrototypeSyntax Prototype, BlockSyntax? Body) : DefinitionSyntax(Tokens, Prototype.NameToken, Prototype.Exported);
public record EnumSyntax(List<Token> Tokens, string Name, bool Exported, TypeSyntax? Type, List<EnumFieldSyntax> Fields) : DefinitionSyntax(Tokens, Name, Exported);
public record StructFieldSyntax(List<Token> Tokens, IdentifierToken NameToken, TypeSyntax Type, ExpressionSyntax? Value) : SyntaxNode(Tokens);
public record StructSyntax(List<Token> Tokens, IdentifierToken NameToken, bool Exported, List<StructFieldSyntax> Fields) : DefinitionSyntax(Tokens, NameToken, Exported);
public record EnumFieldSyntax(List<Token> Tokens, IdentifierToken NameToken, IntLiteralToken? ValueToken) : SyntaxNode(Tokens);
public record EnumSyntax(List<Token> Tokens, IdentifierToken NameToken, bool Exported, TypeSyntax? Type, List<EnumFieldSyntax> Fields) : DefinitionSyntax(Tokens, NameToken, Exported);
public enum UnaryOperatorSyntax
{
@@ -64,7 +70,7 @@ public record AssignmentSyntax(List<Token> Tokens, ExpressionSyntax Target, Expr
public record IfSyntax(List<Token> Tokens, ExpressionSyntax Condition, BlockSyntax Body, Variant<IfSyntax, BlockSyntax>? Else) : StatementSyntax(Tokens);
public record VariableDeclarationSyntax(List<Token> Tokens, string Name, TypeSyntax? ExplicitType, ExpressionSyntax? Assignment) : StatementSyntax(Tokens);
public record VariableDeclarationSyntax(List<Token> Tokens, IdentifierToken NameToken, TypeSyntax? ExplicitType, ExpressionSyntax? Assignment) : StatementSyntax(Tokens);
public record ContinueSyntax(List<Token> Tokens) : StatementSyntax(Tokens);
@@ -74,7 +80,7 @@ public record DeferSyntax(List<Token> Tokens, StatementSyntax Statement) : State
public record WhileSyntax(List<Token> Tokens, ExpressionSyntax Condition, BlockSyntax Body) : StatementSyntax(Tokens);
public record ForSyntax(List<Token> Tokens, string ElementName, string? IndexName, ExpressionSyntax Target, BlockSyntax Body) : StatementSyntax(Tokens);
public record ForSyntax(List<Token> Tokens, IdentifierToken ElementNameToken, IdentifierToken? IndexNameToken, ExpressionSyntax Target, BlockSyntax Body) : StatementSyntax(Tokens);
#endregion
@@ -88,9 +94,9 @@ public record UnaryExpressionSyntax(List<Token> Tokens, UnaryOperatorSyntax Oper
public record FuncCallSyntax(List<Token> Tokens, ExpressionSyntax Expression, List<ExpressionSyntax> Parameters) : ExpressionSyntax(Tokens);
public record LocalIdentifierSyntax(List<Token> Tokens, string Name) : ExpressionSyntax(Tokens);
public record LocalIdentifierSyntax(List<Token> Tokens, IdentifierToken NameToken) : ExpressionSyntax(Tokens);
public record ModuleIdentifierSyntax(List<Token> Tokens, string Module, string Name) : ExpressionSyntax(Tokens);
public record ModuleIdentifierSyntax(List<Token> Tokens, IdentifierToken ModuleToken, IdentifierToken NameToken) : ExpressionSyntax(Tokens);
public record ArrayInitializerSyntax(List<Token> Tokens, List<ExpressionSyntax> Values) : ExpressionSyntax(Tokens);
@@ -98,17 +104,17 @@ public record ArrayIndexAccessSyntax(List<Token> Tokens, ExpressionSyntax Target
public record AddressOfSyntax(List<Token> Tokens, ExpressionSyntax Target) : ExpressionSyntax(Tokens);
public record IntLiteralSyntax(List<Token> Tokens, string Value, int Base) : ExpressionSyntax(Tokens);
public record IntLiteralSyntax(List<Token> Tokens, IntLiteralToken Token) : ExpressionSyntax(Tokens);
public record StringLiteralSyntax(List<Token> Tokens, string Value) : ExpressionSyntax(Tokens);
public record StringLiteralSyntax(List<Token> Tokens, StringLiteralToken Token) : ExpressionSyntax(Tokens);
public record BoolLiteralSyntax(List<Token> Tokens, bool Value) : ExpressionSyntax(Tokens);
public record BoolLiteralSyntax(List<Token> Tokens, BoolLiteralToken Token) : ExpressionSyntax(Tokens);
public record FloatLiteralSyntax(List<Token> Tokens, string Value) : ExpressionSyntax(Tokens);
public record FloatLiteralSyntax(List<Token> Tokens, FloatLiteralToken Token) : ExpressionSyntax(Tokens);
public record MemberAccessSyntax(List<Token> Tokens, ExpressionSyntax Target, string Member) : ExpressionSyntax(Tokens);
public record MemberAccessSyntax(List<Token> Tokens, ExpressionSyntax Target, IdentifierToken MemberToken) : ExpressionSyntax(Tokens);
public record StructInitializerSyntax(List<Token> Tokens, TypeSyntax? StructType, Dictionary<string, ExpressionSyntax> Initializers) : ExpressionSyntax(Tokens);
public record StructInitializerSyntax(List<Token> Tokens, TypeSyntax? StructType, Dictionary<IdentifierToken, ExpressionSyntax> Initializers) : ExpressionSyntax(Tokens);
public record DereferenceSyntax(List<Token> Tokens, ExpressionSyntax Target) : ExpressionSyntax(Tokens);
@@ -140,8 +146,8 @@ public record SliceTypeSyntax(List<Token> Tokens, TypeSyntax BaseType) : TypeSyn
public record ArrayTypeSyntax(List<Token> Tokens, TypeSyntax BaseType) : TypeSyntax(Tokens);
public record ConstArrayTypeSyntax(List<Token> Tokens, TypeSyntax BaseType, long Size) : TypeSyntax(Tokens);
public record ConstArrayTypeSyntax(List<Token> Tokens, TypeSyntax BaseType, ulong Size) : TypeSyntax(Tokens);
public record CustomTypeSyntax(List<Token> Tokens, string? Module, string Name) : TypeSyntax(Tokens);
public record CustomTypeSyntax(List<Token> Tokens, IdentifierToken? ModuleToken, IdentifierToken NameToken) : TypeSyntax(Tokens);
#endregion

View File

@@ -2,8 +2,68 @@
namespace NubLang.Syntax;
public abstract record Token(SourceSpan Span);
public record IdentifierToken(SourceSpan Span, string Value) : Token(Span)
{
public override string ToString()
{
return Value;
}
}
public record IntLiteralToken(SourceSpan Span, string Value, int Base) : Token(Span)
{
public ulong AsU64 => Convert.ToUInt64(Value, Base);
public long AsI64 => Convert.ToInt64(Value, Base);
public uint AsU32 => Convert.ToUInt32(Value, Base);
public int AsI32 => Convert.ToInt32(Value, Base);
public ushort AsU16 => Convert.ToUInt16(Value, Base);
public short AsI16 => Convert.ToInt16(Value, Base);
public byte AsU8 => Convert.ToByte(Value, Base);
public sbyte AsI8 => Convert.ToSByte(Value, Base);
public float AsF32 => Convert.ToSingle(AsI32);
public double AsF64 => Convert.ToDouble(AsI64);
public override string ToString()
{
return Value;
}
}
public record StringLiteralToken(SourceSpan Span, string Value) : Token(Span)
{
public override string ToString()
{
return $"\"{Value}\"";
}
}
public record BoolLiteralToken(SourceSpan Span, bool Value) : Token(Span)
{
public override string ToString()
{
return Value ? "true" : "false";
}
}
public record FloatLiteralToken(SourceSpan Span, string Value) : Token(Span)
{
public float AsF32 => Convert.ToSingle(Value);
public double AsF64 => Convert.ToDouble(Value);
public override string ToString()
{
return Value;
}
}
public enum Symbol
{
// Default
None,
// Control
If,
Else,
@@ -62,16 +122,63 @@ public enum Symbol
QuestionMark,
}
public abstract record Token(SourceSpan Span);
public record IdentifierToken(SourceSpan Span, string Value) : Token(Span);
public record IntLiteralToken(SourceSpan Span, string Value, int Base) : Token(Span);
public record StringLiteralToken(SourceSpan Span, string Value) : Token(Span);
public record BoolLiteralToken(SourceSpan Span, bool Value) : Token(Span);
public record FloatLiteralToken(SourceSpan Span, string Value) : Token(Span);
public record SymbolToken(SourceSpan Span, Symbol Symbol) : Token(Span);
public record SymbolToken(SourceSpan Span, Symbol Symbol) : Token(Span)
{
public override string ToString()
{
return Symbol switch
{
Symbol.Func => "func",
Symbol.If => "if",
Symbol.Else => "else",
Symbol.While => "while",
Symbol.For => "for",
Symbol.In => "in",
Symbol.Break => "break",
Symbol.Continue => "continue",
Symbol.Return => "return",
Symbol.Struct => "struct",
Symbol.Let => "let",
Symbol.Extern => "extern",
Symbol.Module => "module",
Symbol.Export => "export",
Symbol.Import => "import",
Symbol.Defer => "defer",
Symbol.Enum => "enum",
Symbol.Equal => "==",
Symbol.NotEqual => "!=",
Symbol.LessThanOrEqual => "<=",
Symbol.GreaterThanOrEqual => ">=",
Symbol.LeftShift => "<<",
Symbol.RightShift => ">>",
Symbol.And => "&&",
Symbol.Or => "||",
Symbol.DoubleColon => "::",
Symbol.Colon => ":",
Symbol.OpenParen => "(",
Symbol.CloseParen => ")",
Symbol.OpenBrace => "{",
Symbol.CloseBrace => "}",
Symbol.OpenBracket => "[",
Symbol.CloseBracket => "]",
Symbol.Comma => ",",
Symbol.Period => ".",
Symbol.Assign => "=",
Symbol.LessThan => "<",
Symbol.GreaterThan => ">",
Symbol.Plus => "+",
Symbol.Minus => "-",
Symbol.Star => "*",
Symbol.ForwardSlash => "/",
Symbol.Bang => "!",
Symbol.Caret => "^",
Symbol.Ampersand => "&",
Symbol.Semi => ";",
Symbol.Percent => "%",
Symbol.Pipe => "|",
Symbol.At => "@",
Symbol.QuestionMark => "?",
_ => "none",
};
}
}

View File

@@ -4,72 +4,9 @@ namespace NubLang.Syntax;
public sealed class Tokenizer
{
private static readonly Dictionary<string, Symbol> Keywords = new()
{
["func"] = Symbol.Func,
["if"] = Symbol.If,
["else"] = Symbol.Else,
["while"] = Symbol.While,
["for"] = Symbol.For,
["in"] = Symbol.In,
["break"] = Symbol.Break,
["continue"] = Symbol.Continue,
["return"] = Symbol.Return,
["struct"] = Symbol.Struct,
["let"] = Symbol.Let,
["extern"] = Symbol.Extern,
["module"] = Symbol.Module,
["export"] = Symbol.Export,
["import"] = Symbol.Import,
["defer"] = Symbol.Defer,
["enum"] = Symbol.Enum,
};
private static readonly Dictionary<char[], Symbol> Symbols = new()
{
[['=', '=']] = Symbol.Equal,
[['!', '=']] = Symbol.NotEqual,
[['<', '=']] = Symbol.LessThanOrEqual,
[['>', '=']] = Symbol.GreaterThanOrEqual,
[['<', '<']] = Symbol.LeftShift,
[['>', '>']] = Symbol.RightShift,
[['&', '&']] = Symbol.And,
[['|', '|']] = Symbol.Or,
[[':', ':']] = Symbol.DoubleColon,
[[':']] = Symbol.Colon,
[['(']] = Symbol.OpenParen,
[[')']] = Symbol.CloseParen,
[['{']] = Symbol.OpenBrace,
[['}']] = Symbol.CloseBrace,
[['[']] = Symbol.OpenBracket,
[[']']] = Symbol.CloseBracket,
[[',']] = Symbol.Comma,
[['.']] = Symbol.Period,
[['=']] = Symbol.Assign,
[['<']] = Symbol.LessThan,
[['>']] = Symbol.GreaterThan,
[['+']] = Symbol.Plus,
[['-']] = Symbol.Minus,
[['*']] = Symbol.Star,
[['/']] = Symbol.ForwardSlash,
[['!']] = Symbol.Bang,
[['^']] = Symbol.Caret,
[['&']] = Symbol.Ampersand,
[[';']] = Symbol.Semi,
[['%']] = Symbol.Percent,
[['|']] = Symbol.Pipe,
[['@']] = Symbol.At,
[['?']] = Symbol.QuestionMark,
};
private static readonly (char[] Pattern, Symbol Symbol)[] OrderedSymbols = Symbols
.OrderByDescending(kvp => kvp.Key.Length)
.Select(kvp => (kvp.Key, kvp.Value))
.ToArray();
private readonly string _fileName;
private readonly string _content;
private int _index = 0;
private int _index;
private int _line = 1;
private int _column = 1;
@@ -79,8 +16,8 @@ public sealed class Tokenizer
_content = content;
}
public List<Diagnostic> Diagnostics { get; } = [];
public List<Token> Tokens { get; } = [];
public List<Diagnostic> Diagnostics { get; } = new(16);
public List<Token> Tokens { get; } = new(256);
public void Tokenize()
{
@@ -90,17 +27,17 @@ public sealed class Tokenizer
_line = 1;
_column = 1;
while (Peek().HasValue)
while (_index < _content.Length)
{
try
{
var current = Peek()!.Value;
var current = _content[_index];
if (char.IsWhiteSpace(current))
{
if (current is '\n')
if (current == '\n')
{
_line += 1;
// note(nub31): Next increments the column, so 0 is correct here
_column = 0;
}
@@ -108,10 +45,10 @@ public sealed class Tokenizer
continue;
}
if (current == '/' && Peek(1) == '/')
if (current == '/' && _index + 1 < _content.Length && _content[_index + 1] == '/')
{
// note(nub31): Keep newline so next iteration increments the line counter
while (Peek() is not '\n')
Next(2);
while (_index < _content.Length && _content[_index] != '\n')
{
Next();
}
@@ -131,192 +68,312 @@ public sealed class Tokenizer
private Token ParseToken(char current, int lineStart, int columnStart)
{
if (char.IsLetter(current) || current == '_')
{
var buffer = string.Empty;
while (Peek() != null && (char.IsLetterOrDigit(Peek()!.Value) || Peek() == '_'))
{
buffer += Peek();
Next();
}
if (Keywords.TryGetValue(buffer, out var keywordSymbol))
{
return new SymbolToken(CreateSpan(lineStart, columnStart), keywordSymbol);
}
if (buffer is "true" or "false")
{
return new BoolLiteralToken(CreateSpan(lineStart, columnStart), Convert.ToBoolean(buffer));
}
return new IdentifierToken(CreateSpan(lineStart, columnStart), buffer);
}
// Numbers
if (char.IsDigit(current))
{
var buffer = string.Empty;
if (current == '0' && Peek(1) is 'x')
{
buffer += "0x";
Next();
Next();
while (Peek() != null && Uri.IsHexDigit(Peek()!.Value))
{
buffer += Peek()!.Value;
Next();
}
if (buffer.Length <= 2)
{
throw new TokenizerException(Diagnostic
.Error("Invalid hex literal, no digits found")
.At(_fileName, _line, _column)
.Build());
}
return new IntLiteralToken(CreateSpan(lineStart, columnStart), buffer, 16);
}
if (current == '0' && Peek(1) is 'b')
{
buffer += "0b";
Next();
Next();
while (Peek() != null && (Peek() == '0' || Peek() == '1'))
{
buffer += Peek()!.Value;
Next();
}
if (buffer.Length <= 2)
{
throw new TokenizerException(Diagnostic
.Error("Invalid binary literal, no digits found")
.At(_fileName, _line, _column)
.Build());
}
return new IntLiteralToken(CreateSpan(lineStart, columnStart), buffer, 2);
}
var isFloat = false;
while (Peek() != null)
{
var next = Peek()!.Value;
if (next == '.')
{
if (isFloat)
{
throw new TokenizerException(Diagnostic
.Error("More than one period found in float literal")
.At(_fileName, _line, _column)
.Build());
}
isFloat = true;
buffer += next;
Next();
}
else if (char.IsDigit(next))
{
buffer += next;
Next();
}
else
{
break;
}
}
if (isFloat)
{
return new FloatLiteralToken(CreateSpan(lineStart, columnStart), buffer);
}
else
{
return new IntLiteralToken(CreateSpan(lineStart, columnStart), buffer, 10);
}
return ParseNumber(lineStart, columnStart);
}
// String literals
if (current == '"')
{
Next();
var buffer = string.Empty;
while (true)
{
var next = Peek();
if (!next.HasValue)
{
throw new TokenizerException(Diagnostic
.Error("Unclosed string literal")
.At(_fileName, _line, _column)
.Build());
}
if (next is '\n')
{
_line += 1;
break;
}
if (next is '"')
{
Next();
break;
}
buffer += next;
Next();
}
return new StringLiteralToken(CreateSpan(lineStart, columnStart), buffer);
return ParseString(lineStart, columnStart);
}
foreach (var (pattern, symbol) in OrderedSymbols)
// Try keywords and symbols by length (longest first)
for (var i = 8; i >= 1; i--)
{
for (var i = 0; i < pattern.Length; i++)
if (TryMatchSymbol(i, lineStart, columnStart, out var token))
{
var c = Peek(i);
if (!c.HasValue || c.Value != pattern[i]) break;
if (i == pattern.Length - 1)
{
for (var j = 0; j <= i; j++)
{
Next();
}
return new SymbolToken(CreateSpan(lineStart, columnStart), symbol);
}
return token;
}
}
// Identifiers
if (char.IsLetter(current) || current == '_')
{
return ParseIdentifier(lineStart, columnStart);
}
throw new TokenizerException(Diagnostic.Error($"Unknown token '{current}'").Build());
}
private Token ParseNumber(int lineStart, int columnStart)
{
var start = _index;
var current = _content[_index];
// Hex literal
if (current == '0' && _index + 1 < _content.Length && _content[_index + 1] == 'x')
{
Next(2);
var digitStart = _index;
while (_index < _content.Length && Uri.IsHexDigit(_content[_index]))
{
Next();
}
if (_index == digitStart)
{
throw new TokenizerException(Diagnostic
.Error("Invalid hex literal, no digits found")
.At(_fileName, _line, _column)
.Build());
}
return new IntLiteralToken(
CreateSpan(lineStart, columnStart),
_content.Substring(start, _index - start),
16);
}
// Binary literal
if (current == '0' && _index + 1 < _content.Length && _content[_index + 1] == 'b')
{
Next(2);
var digitStart = _index;
while (_index < _content.Length && (_content[_index] == '0' || _content[_index] == '1'))
{
Next();
}
if (_index == digitStart)
{
throw new TokenizerException(Diagnostic
.Error("Invalid binary literal, no digits found")
.At(_fileName, _line, _column)
.Build());
}
return new IntLiteralToken(
CreateSpan(lineStart, columnStart),
_content.Substring(start, _index - start),
2);
}
// Decimal or float
var isFloat = false;
while (_index < _content.Length)
{
var next = _content[_index];
if (next == '.')
{
if (isFloat)
{
throw new TokenizerException(Diagnostic
.Error("More than one period found in float literal")
.At(_fileName, _line, _column)
.Build());
}
isFloat = true;
Next();
}
else if (char.IsDigit(next))
{
Next();
}
else
{
break;
}
}
var buffer = _content.Substring(start, _index - start);
return isFloat
? new FloatLiteralToken(CreateSpan(lineStart, columnStart), buffer)
: new IntLiteralToken(CreateSpan(lineStart, columnStart), buffer, 10);
}
private StringLiteralToken ParseString(int lineStart, int columnStart)
{
Next(); // Skip opening quote
var start = _index;
while (true)
{
if (_index >= _content.Length)
{
throw new TokenizerException(Diagnostic
.Error("Unclosed string literal")
.At(_fileName, _line, _column)
.Build());
}
var next = _content[_index];
if (next == '\n')
{
throw new TokenizerException(Diagnostic
.Error("Unclosed string literal (newline found)")
.At(_fileName, _line, _column)
.Build());
}
if (next == '"')
{
var buffer = _content.Substring(start, _index - start);
Next();
return new StringLiteralToken(CreateSpan(lineStart, columnStart), buffer);
}
Next();
}
}
private bool TryMatchSymbol(int length, int lineStart, int columnStart, out Token token)
{
token = null!;
if (_index + length > _content.Length)
{
return false;
}
var span = _content.AsSpan(_index, length);
var symbol = length switch
{
8 => span switch
{
"continue" => Symbol.Continue,
_ => Symbol.None
},
6 => span switch
{
"return" => Symbol.Return,
"struct" => Symbol.Struct,
"extern" => Symbol.Extern,
"module" => Symbol.Module,
"export" => Symbol.Export,
"import" => Symbol.Import,
_ => Symbol.None
},
5 => span switch
{
"break" => Symbol.Break,
"while" => Symbol.While,
"defer" => Symbol.Defer,
_ => Symbol.None
},
4 => span switch
{
"func" => Symbol.Func,
"else" => Symbol.Else,
"enum" => Symbol.Enum,
_ => Symbol.None
},
3 => span switch
{
"for" => Symbol.For,
"let" => Symbol.Let,
_ => Symbol.None
},
2 => span switch
{
"if" => Symbol.If,
"in" => Symbol.In,
"==" => Symbol.Equal,
"!=" => Symbol.NotEqual,
"<=" => Symbol.LessThanOrEqual,
">=" => Symbol.GreaterThanOrEqual,
"<<" => Symbol.LeftShift,
">>" => Symbol.RightShift,
"&&" => Symbol.And,
"||" => Symbol.Or,
"::" => Symbol.DoubleColon,
_ => Symbol.None
},
1 => span[0] switch
{
':' => Symbol.Colon,
'(' => Symbol.OpenParen,
')' => Symbol.CloseParen,
'{' => Symbol.OpenBrace,
'}' => Symbol.CloseBrace,
'[' => Symbol.OpenBracket,
']' => Symbol.CloseBracket,
',' => Symbol.Comma,
'.' => Symbol.Period,
'=' => Symbol.Assign,
'<' => Symbol.LessThan,
'>' => Symbol.GreaterThan,
'+' => Symbol.Plus,
'-' => Symbol.Minus,
'*' => Symbol.Star,
'/' => Symbol.ForwardSlash,
'!' => Symbol.Bang,
'^' => Symbol.Caret,
'&' => Symbol.Ampersand,
';' => Symbol.Semi,
'%' => Symbol.Percent,
'|' => Symbol.Pipe,
'@' => Symbol.At,
'?' => Symbol.QuestionMark,
_ => Symbol.None
},
_ => Symbol.None
};
if (symbol != Symbol.None)
{
var isAlphaKeyword = char.IsLetter(span[0]);
if (isAlphaKeyword)
{
var nextIdx = _index + length;
if (nextIdx < _content.Length)
{
var nextChar = _content[nextIdx];
if (char.IsLetterOrDigit(nextChar) || nextChar == '_')
{
return false;
}
}
}
Next(length);
token = new SymbolToken(CreateSpan(lineStart, columnStart), symbol);
return true;
}
return false;
}
private IdentifierToken ParseIdentifier(int lineStart, int columnStart)
{
var start = _index;
while (_index < _content.Length)
{
var ch = _content[_index];
if (char.IsLetterOrDigit(ch) || ch == '_')
{
Next();
}
else
{
break;
}
}
return new IdentifierToken(
CreateSpan(lineStart, columnStart),
_content.Substring(start, _index - start));
}
private SourceSpan CreateSpan(int lineStart, int columnStart)
{
return new SourceSpan(_fileName, new SourceLocation(lineStart, columnStart), new SourceLocation(_line, _column));
}
private char? Peek(int offset = 0)
private void Next(int count = 1)
{
if (_index + offset < _content.Length)
{
return _content[_index + offset];
}
return null;
}
private void Next()
{
_index += 1;
_column += 1;
_index += count;
_column += count;
}
}