...
This commit is contained in:
6
src/Syntax/Tokenization/DocumentationToken.cs
Normal file
6
src/Syntax/Tokenization/DocumentationToken.cs
Normal file
@@ -0,0 +1,6 @@
|
||||
namespace syntax.Tokenization;
|
||||
|
||||
public class DocumentationToken(SourceSpan span, string documentation) : Token(span)
|
||||
{
|
||||
public string Documentation { get; } = documentation;
|
||||
}
|
||||
6
src/Syntax/Tokenization/IdentifierToken.cs
Normal file
6
src/Syntax/Tokenization/IdentifierToken.cs
Normal file
@@ -0,0 +1,6 @@
|
||||
namespace syntax.Tokenization;
|
||||
|
||||
public class IdentifierToken(SourceSpan span, string value) : Token(span)
|
||||
{
|
||||
public string Value { get; } = value;
|
||||
}
|
||||
15
src/Syntax/Tokenization/LiteralToken.cs
Normal file
15
src/Syntax/Tokenization/LiteralToken.cs
Normal file
@@ -0,0 +1,15 @@
|
||||
namespace syntax.Tokenization;
|
||||
|
||||
public class LiteralToken(SourceSpan span, LiteralKind kind, string value) : Token(span)
|
||||
{
|
||||
public LiteralKind Kind { get; } = kind;
|
||||
public string Value { get; } = value;
|
||||
}
|
||||
|
||||
public enum LiteralKind
|
||||
{
|
||||
Integer,
|
||||
Float,
|
||||
String,
|
||||
Bool
|
||||
}
|
||||
12
src/Syntax/Tokenization/ModifierToken.cs
Normal file
12
src/Syntax/Tokenization/ModifierToken.cs
Normal file
@@ -0,0 +1,12 @@
|
||||
namespace syntax.Tokenization;
|
||||
|
||||
public class ModifierToken(SourceSpan span, Modifier modifier) : Token(span)
|
||||
{
|
||||
public Modifier Modifier { get; } = modifier;
|
||||
}
|
||||
|
||||
public enum Modifier
|
||||
{
|
||||
Extern,
|
||||
Export
|
||||
}
|
||||
47
src/Syntax/Tokenization/SymbolToken.cs
Normal file
47
src/Syntax/Tokenization/SymbolToken.cs
Normal file
@@ -0,0 +1,47 @@
|
||||
namespace syntax.Tokenization;
|
||||
|
||||
public class SymbolToken(SourceSpan span, Symbol symbol) : Token(span)
|
||||
{
|
||||
public Symbol Symbol { get; } = symbol;
|
||||
}
|
||||
|
||||
public enum Symbol
|
||||
{
|
||||
Func,
|
||||
Return,
|
||||
If,
|
||||
Else,
|
||||
While,
|
||||
Break,
|
||||
Continue,
|
||||
Semicolon,
|
||||
Colon,
|
||||
OpenParen,
|
||||
CloseParen,
|
||||
OpenBrace,
|
||||
CloseBrace,
|
||||
OpenBracket,
|
||||
CloseBracket,
|
||||
Comma,
|
||||
Period,
|
||||
Assign,
|
||||
Bang,
|
||||
Equal,
|
||||
NotEqual,
|
||||
LessThan,
|
||||
LessThanOrEqual,
|
||||
GreaterThan,
|
||||
GreaterThanOrEqual,
|
||||
Plus,
|
||||
Minus,
|
||||
Star,
|
||||
ForwardSlash,
|
||||
Struct,
|
||||
Caret,
|
||||
Ampersand,
|
||||
DoubleColon,
|
||||
Namespace,
|
||||
Let,
|
||||
Alloc,
|
||||
Calls
|
||||
}
|
||||
6
src/Syntax/Tokenization/Token.cs
Normal file
6
src/Syntax/Tokenization/Token.cs
Normal file
@@ -0,0 +1,6 @@
|
||||
namespace syntax.Tokenization;
|
||||
|
||||
public abstract class Token(SourceSpan span)
|
||||
{
|
||||
public SourceSpan Span { get; } = span;
|
||||
}
|
||||
279
src/Syntax/Tokenization/Tokenizer.cs
Normal file
279
src/Syntax/Tokenization/Tokenizer.cs
Normal file
@@ -0,0 +1,279 @@
|
||||
using common;
|
||||
using syntax.Diagnostics;
|
||||
|
||||
namespace syntax.Tokenization;
|
||||
|
||||
public static class Tokenizer
|
||||
{
|
||||
private static readonly Dictionary<string, Symbol> Keywords = new()
|
||||
{
|
||||
["namespace"] = Symbol.Namespace,
|
||||
["func"] = Symbol.Func,
|
||||
["if"] = Symbol.If,
|
||||
["else"] = Symbol.Else,
|
||||
["while"] = Symbol.While,
|
||||
["break"] = Symbol.Break,
|
||||
["continue"] = Symbol.Continue,
|
||||
["return"] = Symbol.Return,
|
||||
["alloc"] = Symbol.Alloc,
|
||||
["struct"] = Symbol.Struct,
|
||||
["let"] = Symbol.Let,
|
||||
["calls"] = Symbol.Calls,
|
||||
};
|
||||
|
||||
private static readonly Dictionary<string, Modifier> Modifiers = new()
|
||||
{
|
||||
["export"] = Modifier.Export,
|
||||
["extern"] = Modifier.Extern,
|
||||
};
|
||||
|
||||
private static readonly Dictionary<char[], Symbol> Chians = new()
|
||||
{
|
||||
[['=', '=']] = Symbol.Equal,
|
||||
[['!', '=']] = Symbol.NotEqual,
|
||||
[['<', '=']] = Symbol.LessThanOrEqual,
|
||||
[['>', '=']] = Symbol.GreaterThanOrEqual,
|
||||
[[':', ':']] = Symbol.DoubleColon,
|
||||
};
|
||||
|
||||
private static readonly Dictionary<char, Symbol> Chars = new()
|
||||
{
|
||||
[';'] = Symbol.Semicolon,
|
||||
[':'] = Symbol.Colon,
|
||||
['('] = Symbol.OpenParen,
|
||||
[')'] = Symbol.CloseParen,
|
||||
['{'] = Symbol.OpenBrace,
|
||||
['}'] = Symbol.CloseBrace,
|
||||
['['] = Symbol.OpenBracket,
|
||||
[']'] = Symbol.CloseBracket,
|
||||
[','] = Symbol.Comma,
|
||||
['.'] = Symbol.Period,
|
||||
['='] = Symbol.Assign,
|
||||
['<'] = Symbol.LessThan,
|
||||
['>'] = Symbol.GreaterThan,
|
||||
['+'] = Symbol.Plus,
|
||||
['-'] = Symbol.Minus,
|
||||
['*'] = Symbol.Star,
|
||||
['/'] = Symbol.ForwardSlash,
|
||||
['!'] = Symbol.Bang,
|
||||
['^'] = Symbol.Caret,
|
||||
['&'] = Symbol.Ampersand,
|
||||
};
|
||||
|
||||
private static SourceText _sourceText;
|
||||
private static int _index;
|
||||
|
||||
public static DiagnosticsResult<List<Token>> Tokenize(SourceText sourceText)
|
||||
{
|
||||
_sourceText = sourceText;
|
||||
_index = 0;
|
||||
|
||||
List<Token> tokens = [];
|
||||
while (ParseToken().TryGetValue(out var token))
|
||||
{
|
||||
tokens.Add(token);
|
||||
}
|
||||
|
||||
return new DiagnosticsResult<List<Token>>([], tokens);
|
||||
}
|
||||
|
||||
private static void ConsumeWhitespace()
|
||||
{
|
||||
while (Peek().TryGetValue(out var character) && char.IsWhiteSpace(character))
|
||||
{
|
||||
Next();
|
||||
}
|
||||
}
|
||||
|
||||
private static Optional<Token> ParseToken()
|
||||
{
|
||||
ConsumeWhitespace();
|
||||
var startIndex = _index;
|
||||
|
||||
if (!Peek().TryGetValue(out var current))
|
||||
{
|
||||
return Optional<Token>.Empty();
|
||||
}
|
||||
|
||||
if (current == '/' && Peek(1).TryGetValue(out var nextChar) && nextChar == '/')
|
||||
{
|
||||
Next();
|
||||
Next();
|
||||
|
||||
if (Peek().TryGetValue(out var thirdChar) && thirdChar == '/')
|
||||
{
|
||||
Next();
|
||||
var buffer = string.Empty;
|
||||
while (Peek().TryGetValue(out var character) && character != '\n')
|
||||
{
|
||||
buffer += character;
|
||||
Next();
|
||||
}
|
||||
Next();
|
||||
return new DocumentationToken(CreateSpan(startIndex), buffer);
|
||||
}
|
||||
|
||||
while (Peek().TryGetValue(out var character) && character != '\n')
|
||||
{
|
||||
Next();
|
||||
}
|
||||
Next();
|
||||
return ParseToken();
|
||||
}
|
||||
|
||||
if (char.IsLetter(current) || current == '_')
|
||||
{
|
||||
var buffer = string.Empty;
|
||||
|
||||
while (Peek().TryGetValue(out var next) && (char.IsLetterOrDigit(next) || next == '_'))
|
||||
{
|
||||
buffer += next;
|
||||
Next();
|
||||
}
|
||||
|
||||
if (Keywords.TryGetValue(buffer, out var keywordSymbol))
|
||||
{
|
||||
return new SymbolToken(CreateSpan(startIndex), keywordSymbol);
|
||||
}
|
||||
|
||||
if (Modifiers.TryGetValue(buffer, out var modifer))
|
||||
{
|
||||
return new ModifierToken(CreateSpan(startIndex), modifer);
|
||||
}
|
||||
|
||||
if (buffer is "true" or "false")
|
||||
{
|
||||
return new LiteralToken(CreateSpan(startIndex), LiteralKind.Bool, buffer);
|
||||
}
|
||||
|
||||
return new IdentifierToken(CreateSpan(startIndex), buffer);
|
||||
}
|
||||
|
||||
if (char.IsDigit(current))
|
||||
{
|
||||
var isFloat = false;
|
||||
var buffer = string.Empty;
|
||||
|
||||
while (Peek().TryGetValue(out var next))
|
||||
{
|
||||
if (next == '.')
|
||||
{
|
||||
if (isFloat)
|
||||
{
|
||||
throw new Exception("More than one period found in float literal");
|
||||
}
|
||||
|
||||
isFloat = true;
|
||||
buffer += next;
|
||||
Next();
|
||||
}
|
||||
else if (char.IsDigit(next))
|
||||
{
|
||||
buffer += next;
|
||||
Next();
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return new LiteralToken(CreateSpan(startIndex), isFloat ? LiteralKind.Float : LiteralKind.Integer, buffer);
|
||||
}
|
||||
|
||||
// TODO: Revisit this
|
||||
foreach (var chain in Chians)
|
||||
{
|
||||
if (current != chain.Key[0]) continue;
|
||||
|
||||
for (var i = 1; i < chain.Key.Length; i++)
|
||||
{
|
||||
var c = Peek(i);
|
||||
if (!c.HasValue || c.Value != chain.Key[i]) break;
|
||||
|
||||
if (i == chain.Key.Length - 1)
|
||||
{
|
||||
for (var j = 0; j <= i; j++)
|
||||
{
|
||||
Next();
|
||||
}
|
||||
|
||||
return new SymbolToken(CreateSpan(startIndex), chain.Value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (Chars.TryGetValue(current, out var charSymbol))
|
||||
{
|
||||
Next();
|
||||
return new SymbolToken(CreateSpan(startIndex), charSymbol);
|
||||
}
|
||||
|
||||
if (current == '"')
|
||||
{
|
||||
Next();
|
||||
var buffer = string.Empty;
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (!Peek().TryGetValue(out var next))
|
||||
{
|
||||
throw new Exception("Unclosed string literal");
|
||||
}
|
||||
|
||||
if (next == '"')
|
||||
{
|
||||
Next();
|
||||
break;
|
||||
}
|
||||
|
||||
buffer += next;
|
||||
Next();
|
||||
}
|
||||
|
||||
return new LiteralToken(CreateSpan(startIndex), LiteralKind.String, buffer);
|
||||
}
|
||||
|
||||
throw new Exception($"Unknown character {current}");
|
||||
}
|
||||
|
||||
private static SourceLocation CreateLocation(int index)
|
||||
{
|
||||
var line = 1;
|
||||
var column = 1;
|
||||
for (var i = 0; i < Math.Min(index, _sourceText.Content.Length - 1); i++)
|
||||
{
|
||||
if (_sourceText.Content[i] == '\n')
|
||||
{
|
||||
column = 1;
|
||||
line += 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
column += 1;
|
||||
}
|
||||
}
|
||||
|
||||
return new SourceLocation(line, column);
|
||||
}
|
||||
|
||||
private static SourceSpan CreateSpan(int startIndex)
|
||||
{
|
||||
return new SourceSpan(_sourceText, CreateLocation(startIndex), CreateLocation(_index));
|
||||
}
|
||||
|
||||
private static Optional<char> Peek(int offset = 0)
|
||||
{
|
||||
if (_index + offset < _sourceText.Content.Length)
|
||||
{
|
||||
return _sourceText.Content[_index + offset];
|
||||
}
|
||||
|
||||
return Optional<char>.Empty();
|
||||
}
|
||||
|
||||
private static void Next()
|
||||
{
|
||||
_index++;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user