386 lines
11 KiB
C#
386 lines
11 KiB
C#
using NubLang.Diagnostics;
|
|
|
|
namespace NubLang.Syntax;
|
|
|
|
public sealed class Tokenizer
|
|
{
|
|
private string _fileName = null!;
|
|
private string _content = null!;
|
|
private int _index;
|
|
private int _line = 1;
|
|
private int _column = 1;
|
|
|
|
public List<Diagnostic> Diagnostics { get; set; } = new(16);
|
|
public List<Token> Tokens { get; set; } = new(256);
|
|
|
|
public void Tokenize(string fileName, string content)
|
|
{
|
|
_fileName = fileName;
|
|
_content = content;
|
|
|
|
Diagnostics = [];
|
|
Tokens = [];
|
|
|
|
_index = 0;
|
|
_line = 1;
|
|
_column = 1;
|
|
|
|
while (_index < _content.Length)
|
|
{
|
|
try
|
|
{
|
|
var current = _content[_index];
|
|
|
|
if (char.IsWhiteSpace(current))
|
|
{
|
|
if (current == '\n')
|
|
{
|
|
_line += 1;
|
|
_column = 0;
|
|
}
|
|
|
|
Next();
|
|
continue;
|
|
}
|
|
|
|
if (current == '/' && _index + 1 < _content.Length && _content[_index + 1] == '/')
|
|
{
|
|
Next(2);
|
|
while (_index < _content.Length && _content[_index] != '\n')
|
|
{
|
|
Next();
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
Tokens.Add(ParseToken(current, _line, _column));
|
|
}
|
|
catch (CompileException e)
|
|
{
|
|
Diagnostics.Add(e.Diagnostic);
|
|
Next();
|
|
}
|
|
}
|
|
}
|
|
|
|
private Token ParseToken(char current, int lineStart, int columnStart)
|
|
{
|
|
if (char.IsDigit(current))
|
|
{
|
|
return ParseNumber(lineStart, columnStart);
|
|
}
|
|
|
|
if (current == '"')
|
|
{
|
|
return ParseString(lineStart, columnStart);
|
|
}
|
|
|
|
// note(nub31): Look for keywords (longest first in case a keyword fits partially in a larger keyword)
|
|
for (var i = 8; i >= 1; i--)
|
|
{
|
|
if (TryMatchSymbol(i, lineStart, columnStart, out var token))
|
|
{
|
|
return token;
|
|
}
|
|
}
|
|
|
|
if (char.IsLetter(current) || current == '_')
|
|
{
|
|
return ParseIdentifier(lineStart, columnStart);
|
|
}
|
|
|
|
throw new CompileException(Diagnostic.Error($"Unknown token '{current}'").Build());
|
|
}
|
|
|
|
private Token ParseNumber(int lineStart, int columnStart)
|
|
{
|
|
var start = _index;
|
|
var current = _content[_index];
|
|
|
|
// note(nub31): 0xFFFFFF
|
|
if (current == '0' && _index + 1 < _content.Length && _content[_index + 1] == 'x')
|
|
{
|
|
Next(2);
|
|
var digitStart = _index;
|
|
|
|
while (_index < _content.Length && Uri.IsHexDigit(_content[_index]))
|
|
{
|
|
Next();
|
|
}
|
|
|
|
if (_index == digitStart)
|
|
{
|
|
throw new CompileException(Diagnostic
|
|
.Error("Invalid hex literal, no digits found")
|
|
.At(_fileName, _line, _column)
|
|
.Build());
|
|
}
|
|
|
|
return new IntLiteralToken(
|
|
CreateSpan(lineStart, columnStart),
|
|
_content.Substring(start, _index - start),
|
|
16);
|
|
}
|
|
|
|
// note(nub31): 0b11001100
|
|
if (current == '0' && _index + 1 < _content.Length && _content[_index + 1] == 'b')
|
|
{
|
|
Next(2);
|
|
var digitStart = _index;
|
|
|
|
while (_index < _content.Length && (_content[_index] == '0' || _content[_index] == '1'))
|
|
{
|
|
Next();
|
|
}
|
|
|
|
if (_index == digitStart)
|
|
{
|
|
throw new CompileException(Diagnostic
|
|
.Error("Invalid binary literal, no digits found")
|
|
.At(_fileName, _line, _column)
|
|
.Build());
|
|
}
|
|
|
|
return new IntLiteralToken(
|
|
CreateSpan(lineStart, columnStart),
|
|
_content.Substring(start, _index - start),
|
|
2);
|
|
}
|
|
|
|
// note(nub31): 23/23.5
|
|
var isFloat = false;
|
|
while (_index < _content.Length)
|
|
{
|
|
var next = _content[_index];
|
|
|
|
if (next == '.')
|
|
{
|
|
if (isFloat)
|
|
{
|
|
throw new CompileException(Diagnostic
|
|
.Error("More than one period found in float literal")
|
|
.At(_fileName, _line, _column)
|
|
.Build());
|
|
}
|
|
|
|
isFloat = true;
|
|
Next();
|
|
}
|
|
else if (char.IsDigit(next))
|
|
{
|
|
Next();
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
var buffer = _content.Substring(start, _index - start);
|
|
|
|
return isFloat
|
|
? new FloatLiteralToken(CreateSpan(lineStart, columnStart), buffer)
|
|
: new IntLiteralToken(CreateSpan(lineStart, columnStart), buffer, 10);
|
|
}
|
|
|
|
private StringLiteralToken ParseString(int lineStart, int columnStart)
|
|
{
|
|
Next();
|
|
var start = _index;
|
|
|
|
while (true)
|
|
{
|
|
if (_index >= _content.Length)
|
|
{
|
|
throw new CompileException(Diagnostic
|
|
.Error("Unclosed string literal")
|
|
.At(_fileName, _line, _column)
|
|
.Build());
|
|
}
|
|
|
|
var next = _content[_index];
|
|
|
|
if (next == '\n')
|
|
{
|
|
throw new CompileException(Diagnostic
|
|
.Error("Unclosed string literal (newline found)")
|
|
.At(_fileName, _line, _column)
|
|
.Build());
|
|
}
|
|
|
|
if (next == '"')
|
|
{
|
|
var buffer = _content.Substring(start, _index - start);
|
|
Next();
|
|
return new StringLiteralToken(CreateSpan(lineStart, columnStart), buffer);
|
|
}
|
|
|
|
Next();
|
|
}
|
|
}
|
|
|
|
private bool TryMatchSymbol(int length, int lineStart, int columnStart, out Token token)
|
|
{
|
|
token = null!;
|
|
|
|
if (_index + length > _content.Length)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
var span = _content.AsSpan(_index, length);
|
|
|
|
if (span is "true")
|
|
{
|
|
Next(4);
|
|
token = new BoolLiteralToken(CreateSpan(lineStart, columnStart), true);
|
|
return true;
|
|
}
|
|
|
|
if (span is "false")
|
|
{
|
|
Next(5);
|
|
token = new BoolLiteralToken(CreateSpan(lineStart, columnStart), false);
|
|
return true;
|
|
}
|
|
|
|
var symbol = length switch
|
|
{
|
|
8 => span switch
|
|
{
|
|
"continue" => Symbol.Continue,
|
|
_ => Symbol.None
|
|
},
|
|
6 => span switch
|
|
{
|
|
"return" => Symbol.Return,
|
|
"struct" => Symbol.Struct,
|
|
"extern" => Symbol.Extern,
|
|
"packed" => Symbol.Packed,
|
|
"module" => Symbol.Module,
|
|
"export" => Symbol.Export,
|
|
_ => Symbol.None
|
|
},
|
|
5 => span switch
|
|
{
|
|
"break" => Symbol.Break,
|
|
"while" => Symbol.While,
|
|
"defer" => Symbol.Defer,
|
|
_ => Symbol.None
|
|
},
|
|
4 => span switch
|
|
{
|
|
"func" => Symbol.Func,
|
|
"else" => Symbol.Else,
|
|
"enum" => Symbol.Enum,
|
|
_ => Symbol.None
|
|
},
|
|
3 => span switch
|
|
{
|
|
"for" => Symbol.For,
|
|
"let" => Symbol.Let,
|
|
_ => Symbol.None
|
|
},
|
|
2 => span switch
|
|
{
|
|
"if" => Symbol.If,
|
|
"in" => Symbol.In,
|
|
"==" => Symbol.Equal,
|
|
"!=" => Symbol.NotEqual,
|
|
"<=" => Symbol.LessThanOrEqual,
|
|
">=" => Symbol.GreaterThanOrEqual,
|
|
"<<" => Symbol.LeftShift,
|
|
">>" => Symbol.RightShift,
|
|
"&&" => Symbol.And,
|
|
"||" => Symbol.Or,
|
|
"::" => Symbol.DoubleColon,
|
|
"x|" => Symbol.XOr,
|
|
_ => Symbol.None
|
|
},
|
|
1 => span[0] switch
|
|
{
|
|
':' => Symbol.Colon,
|
|
'(' => Symbol.OpenParen,
|
|
')' => Symbol.CloseParen,
|
|
'{' => Symbol.OpenBrace,
|
|
'}' => Symbol.CloseBrace,
|
|
'[' => Symbol.OpenBracket,
|
|
']' => Symbol.CloseBracket,
|
|
',' => Symbol.Comma,
|
|
'.' => Symbol.Period,
|
|
'=' => Symbol.Assign,
|
|
'<' => Symbol.LessThan,
|
|
'>' => Symbol.GreaterThan,
|
|
'+' => Symbol.Plus,
|
|
'-' => Symbol.Minus,
|
|
'*' => Symbol.Star,
|
|
'/' => Symbol.ForwardSlash,
|
|
'!' => Symbol.Bang,
|
|
'^' => Symbol.Caret,
|
|
'&' => Symbol.Ampersand,
|
|
';' => Symbol.Semi,
|
|
'%' => Symbol.Percent,
|
|
'|' => Symbol.Pipe,
|
|
'@' => Symbol.At,
|
|
'?' => Symbol.QuestionMark,
|
|
_ => Symbol.None
|
|
},
|
|
_ => Symbol.None
|
|
};
|
|
|
|
if (symbol != Symbol.None)
|
|
{
|
|
var isAlphaKeyword = char.IsLetter(span[0]);
|
|
if (isAlphaKeyword)
|
|
{
|
|
var nextIdx = _index + length;
|
|
if (nextIdx < _content.Length)
|
|
{
|
|
var nextChar = _content[nextIdx];
|
|
if (char.IsLetterOrDigit(nextChar) || nextChar == '_')
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
Next(length);
|
|
token = new SymbolToken(CreateSpan(lineStart, columnStart), symbol);
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
private IdentifierToken ParseIdentifier(int lineStart, int columnStart)
|
|
{
|
|
var start = _index;
|
|
|
|
while (_index < _content.Length)
|
|
{
|
|
var ch = _content[_index];
|
|
if (char.IsLetterOrDigit(ch) || ch == '_')
|
|
{
|
|
Next();
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
return new IdentifierToken(CreateSpan(lineStart, columnStart), _content.Substring(start, _index - start));
|
|
}
|
|
|
|
private SourceSpan CreateSpan(int lineStart, int columnStart)
|
|
{
|
|
return new SourceSpan(_fileName, new SourceLocation(lineStart, columnStart), new SourceLocation(_line, _column));
|
|
}
|
|
|
|
private void Next(int count = 1)
|
|
{
|
|
_index += count;
|
|
_column += count;
|
|
}
|
|
} |