271 lines
7.1 KiB
C#
271 lines
7.1 KiB
C#
namespace Nub.Lang.Frontend.Lexing;
|
|
|
|
public class Lexer
|
|
{
|
|
private static readonly Dictionary<string, Symbol> Keywords = new()
|
|
{
|
|
["func"] = Symbol.Func,
|
|
["import"] = Symbol.Import,
|
|
["if"] = Symbol.If,
|
|
["else"] = Symbol.Else,
|
|
["while"] = Symbol.While,
|
|
["break"] = Symbol.Break,
|
|
["continue"] = Symbol.Continue,
|
|
["return"] = Symbol.Return,
|
|
["new"] = Symbol.New,
|
|
["struct"] = Symbol.Struct,
|
|
};
|
|
|
|
private static readonly Dictionary<string, Modifier> Modifiers = new()
|
|
{
|
|
["global"] = Modifier.Global,
|
|
["extern"] = Modifier.Extern,
|
|
};
|
|
|
|
private static readonly Dictionary<char[], Symbol> Chians = new()
|
|
{
|
|
[['=', '=']] = Symbol.Equal,
|
|
[['!', '=']] = Symbol.NotEqual,
|
|
[['<', '=']] = Symbol.LessThanOrEqual,
|
|
[['>', '=']] = Symbol.GreaterThanOrEqual,
|
|
};
|
|
|
|
private static readonly Dictionary<char, Symbol> Chars = new()
|
|
{
|
|
[';'] = Symbol.Semicolon,
|
|
[':'] = Symbol.Colon,
|
|
['('] = Symbol.OpenParen,
|
|
[')'] = Symbol.CloseParen,
|
|
['{'] = Symbol.OpenBrace,
|
|
['}'] = Symbol.CloseBrace,
|
|
['['] = Symbol.OpenBracket,
|
|
[']'] = Symbol.CloseBracket,
|
|
[','] = Symbol.Comma,
|
|
['.'] = Symbol.Period,
|
|
['='] = Symbol.Assign,
|
|
['<'] = Symbol.LessThan,
|
|
['>'] = Symbol.GreaterThan,
|
|
['+'] = Symbol.Plus,
|
|
['-'] = Symbol.Minus,
|
|
['*'] = Symbol.Star,
|
|
['/'] = Symbol.ForwardSlash,
|
|
['!'] = Symbol.Bang,
|
|
['^'] = Symbol.Caret,
|
|
['&'] = Symbol.Ampersand,
|
|
};
|
|
|
|
private string _src = null!;
|
|
private SourceFile _sourceFile;
|
|
private int _index;
|
|
|
|
public List<Token> Lex(string src, SourceFile sourceFile)
|
|
{
|
|
_src = src;
|
|
_sourceFile = sourceFile;
|
|
_index = 0;
|
|
|
|
List<Token> tokens = [];
|
|
while (ParseToken().TryGetValue(out var token))
|
|
{
|
|
tokens.Add(token);
|
|
}
|
|
|
|
return tokens;
|
|
}
|
|
|
|
private void ConsumeWhitespace()
|
|
{
|
|
while (Peek().TryGetValue(out var character) && char.IsWhiteSpace(character))
|
|
{
|
|
Next();
|
|
}
|
|
}
|
|
|
|
private Optional<Token> ParseToken()
|
|
{
|
|
ConsumeWhitespace();
|
|
var startIndex = _index;
|
|
|
|
string? documentation = null;
|
|
while (Peek() is { Value: '/' } && Peek(1) is { Value: '/' })
|
|
{
|
|
Next();
|
|
Next();
|
|
|
|
if (Peek() is { Value: '/' })
|
|
{
|
|
Next();
|
|
|
|
while (Peek().TryGetValue(out var character))
|
|
{
|
|
Next();
|
|
documentation += character;
|
|
if (character == '\n')
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
while (Peek().TryGetValue(out var character))
|
|
{
|
|
Next();
|
|
if (character == '\n')
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (documentation != null)
|
|
{
|
|
return new DocumentationToken(_sourceFile, startIndex, _index, documentation);
|
|
}
|
|
|
|
ConsumeWhitespace();
|
|
startIndex = _index;
|
|
|
|
if (!Peek().TryGetValue(out var current))
|
|
{
|
|
return Optional<Token>.Empty();
|
|
}
|
|
|
|
if (char.IsLetter(current) || current == '_')
|
|
{
|
|
var buffer = string.Empty;
|
|
|
|
while (Peek().TryGetValue(out var next) && (char.IsLetterOrDigit(next) || next == '_'))
|
|
{
|
|
buffer += next;
|
|
Next();
|
|
}
|
|
|
|
if (Keywords.TryGetValue(buffer, out var keywordSymbol))
|
|
{
|
|
return new SymbolToken(_sourceFile, startIndex, _index, keywordSymbol);
|
|
}
|
|
|
|
if (Modifiers.TryGetValue(buffer, out var modifer))
|
|
{
|
|
return new ModifierToken(_sourceFile, startIndex, _index, modifer);
|
|
}
|
|
|
|
if (buffer is "true" or "false")
|
|
{
|
|
return new LiteralToken(_sourceFile, startIndex, _index, NubPrimitiveType.Bool, buffer);
|
|
}
|
|
|
|
return new IdentifierToken(_sourceFile, startIndex, _index, buffer);
|
|
}
|
|
|
|
if (char.IsDigit(current))
|
|
{
|
|
var isFloat = false;
|
|
var buffer = string.Empty;
|
|
|
|
while (Peek().TryGetValue(out var next))
|
|
{
|
|
if (next == '.')
|
|
{
|
|
if (isFloat)
|
|
{
|
|
throw new Exception("More than one period found in float literal");
|
|
}
|
|
|
|
isFloat = true;
|
|
buffer += next;
|
|
Next();
|
|
}
|
|
else if (char.IsDigit(next))
|
|
{
|
|
buffer += next;
|
|
Next();
|
|
}
|
|
else if (next == 'f')
|
|
{
|
|
isFloat = true;
|
|
Next();
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
return new LiteralToken(_sourceFile, startIndex, _index, isFloat ? NubPrimitiveType.F64 : NubPrimitiveType.I64, buffer);
|
|
}
|
|
|
|
// TODO: Revisit this
|
|
foreach (var chain in Chians)
|
|
{
|
|
if (current != chain.Key[0]) continue;
|
|
|
|
for (var i = 1; i < chain.Key.Length; i++)
|
|
{
|
|
var c = Peek(i);
|
|
if (!c.HasValue || c.Value != chain.Key[i]) break;
|
|
|
|
if (i == chain.Key.Length - 1)
|
|
{
|
|
for (var j = 0; j <= i; j++)
|
|
{
|
|
Next();
|
|
}
|
|
|
|
return new SymbolToken(_sourceFile, startIndex, _index, chain.Value);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (Chars.TryGetValue(current, out var charSymbol))
|
|
{
|
|
Next();
|
|
return new SymbolToken(_sourceFile, startIndex, _index, charSymbol);
|
|
}
|
|
|
|
if (current == '"')
|
|
{
|
|
Next();
|
|
var buffer = string.Empty;
|
|
|
|
while (true)
|
|
{
|
|
if (!Peek().TryGetValue(out var next))
|
|
{
|
|
throw new Exception("Unclosed string literal");
|
|
}
|
|
|
|
if (next == '"')
|
|
{
|
|
Next();
|
|
break;
|
|
}
|
|
|
|
buffer += next;
|
|
Next();
|
|
}
|
|
|
|
return new LiteralToken(_sourceFile, startIndex, _index, NubPrimitiveType.String, buffer);
|
|
}
|
|
|
|
throw new Exception($"Unknown character {current}");
|
|
}
|
|
|
|
private Optional<char> Peek(int offset = 0)
|
|
{
|
|
if (_index + offset < _src.Length)
|
|
{
|
|
return _src[_index + offset];
|
|
}
|
|
|
|
return Optional<char>.Empty();
|
|
}
|
|
|
|
private void Next()
|
|
{
|
|
_index++;
|
|
}
|
|
} |