Perf improvements in tokenizer
This commit is contained in:
@@ -1,9 +1,7 @@
|
||||
using NubLang.Code;
|
||||
|
||||
namespace NubLang.CLI;
|
||||
|
||||
public class Options
|
||||
{
|
||||
public string? OutputPath { get; set; }
|
||||
public List<SourceFile> Files { get; } = [];
|
||||
public List<string> Files { get; } = [];
|
||||
}
|
||||
@@ -1,6 +1,5 @@
|
||||
using System.Diagnostics;
|
||||
using NubLang.CLI;
|
||||
using NubLang.Code;
|
||||
using NubLang.Diagnostics;
|
||||
using NubLang.Generation.QBE;
|
||||
using NubLang.Modules;
|
||||
@@ -32,7 +31,7 @@ for (var i = 0; i < args.Length; i++)
|
||||
}
|
||||
default:
|
||||
{
|
||||
options.Files.Add(new SourceFile(arg));
|
||||
options.Files.Add(arg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -43,7 +42,7 @@ sw.Restart();
|
||||
|
||||
foreach (var file in options.Files)
|
||||
{
|
||||
if (!File.Exists(file.Path))
|
||||
if (!File.Exists(file))
|
||||
{
|
||||
Console.Error.WriteLine($"File '{file}' does not exist");
|
||||
return 1;
|
||||
@@ -58,18 +57,18 @@ var diagnostics = new List<Diagnostic>();
|
||||
var syntaxTrees = new List<SyntaxTree>();
|
||||
foreach (var file in options.Files)
|
||||
{
|
||||
var tokenizer = new Tokenizer(file);
|
||||
var tokens = tokenizer.Tokenize().ToList();
|
||||
diagnostics.AddRange(tokenizer.GetDiagnostics());
|
||||
var tokenizer = new Tokenizer(file, File.ReadAllText(file));
|
||||
tokenizer.Tokenize();
|
||||
diagnostics.AddRange(tokenizer.Diagnostics);
|
||||
|
||||
Console.WriteLine($"Tokenize: {Path.GetFileName(file.Path)}: {sw.ElapsedMilliseconds}ms");
|
||||
Console.WriteLine($" Tokenize: {Path.GetFileName(file)}: {sw.ElapsedMilliseconds}ms");
|
||||
sw.Restart();
|
||||
|
||||
var parser = new Parser();
|
||||
var syntaxTree = parser.Parse(tokens);
|
||||
diagnostics.AddRange(parser.GetDiagnostics());
|
||||
var syntaxTree = parser.Parse(tokenizer.Tokens);
|
||||
diagnostics.AddRange(parser.Diagnostics);
|
||||
|
||||
Console.WriteLine($"Parse: {Path.GetFileName(file.Path)}: {sw.ElapsedMilliseconds}ms");
|
||||
Console.WriteLine($" Parse: {Path.GetFileName(file)}: {sw.ElapsedMilliseconds}ms");
|
||||
sw.Restart();
|
||||
|
||||
syntaxTrees.Add(syntaxTree);
|
||||
@@ -91,7 +90,7 @@ foreach (var syntaxTree in syntaxTrees)
|
||||
var typeChecker = new TypeChecker(syntaxTree, moduleRepository);
|
||||
typeChecker.Check();
|
||||
|
||||
Console.WriteLine($"Type check {syntaxTree.Metadata.ModuleName}: {sw.ElapsedMilliseconds}ms");
|
||||
Console.WriteLine($" Type check {syntaxTree.Metadata.ModuleName}: {sw.ElapsedMilliseconds}ms");
|
||||
sw.Restart();
|
||||
|
||||
definitions.AddRange(typeChecker.Definitions);
|
||||
|
||||
@@ -1,41 +0,0 @@
|
||||
namespace NubLang.Code;
|
||||
|
||||
public class SourceFile
|
||||
{
|
||||
private string? _content;
|
||||
|
||||
public SourceFile(string path)
|
||||
{
|
||||
Path = path ?? throw new ArgumentNullException(nameof(path));
|
||||
}
|
||||
|
||||
public string Path { get; }
|
||||
|
||||
public string GetText() => _content ??= File.ReadAllText(Path);
|
||||
public override string ToString() => Path;
|
||||
|
||||
public override bool Equals(object? obj)
|
||||
{
|
||||
return obj is SourceFile other && other.Path == Path;
|
||||
}
|
||||
|
||||
public override int GetHashCode()
|
||||
{
|
||||
return HashCode.Combine(typeof(SourceFile), Path);
|
||||
}
|
||||
|
||||
public static bool operator ==(SourceFile? left, SourceFile? right) => Equals(left, right);
|
||||
public static bool operator !=(SourceFile? left, SourceFile? right) => !Equals(left, right);
|
||||
}
|
||||
|
||||
public class SourceFileSpan
|
||||
{
|
||||
public SourceFileSpan(SourceFile sourceFile, SourceSpan span)
|
||||
{
|
||||
SourceFile = sourceFile;
|
||||
Span = span;
|
||||
}
|
||||
|
||||
public SourceFile SourceFile { get; }
|
||||
public SourceSpan Span { get; }
|
||||
}
|
||||
@@ -2,8 +2,6 @@ namespace NubLang.Code;
|
||||
|
||||
public readonly struct SourceLocation : IEquatable<SourceLocation>, IComparable<SourceLocation>
|
||||
{
|
||||
public static SourceLocation Zero => new(0, 0);
|
||||
|
||||
public SourceLocation(int line, int column)
|
||||
{
|
||||
Line = line;
|
||||
|
||||
@@ -2,34 +2,33 @@ namespace NubLang.Code;
|
||||
|
||||
public readonly struct SourceSpan : IEquatable<SourceSpan>, IComparable<SourceSpan>
|
||||
{
|
||||
public static SourceSpan Zero => new(SourceLocation.Zero, SourceLocation.Zero);
|
||||
|
||||
public static SourceSpan Merge(params IEnumerable<SourceSpan> spans)
|
||||
{
|
||||
var spanArray = spans as SourceSpan[] ?? spans.ToArray();
|
||||
|
||||
if (spanArray.Length == 0)
|
||||
{
|
||||
return Zero;
|
||||
return new SourceSpan(string.Empty, new SourceLocation(0, 0), new SourceLocation(0, 0));
|
||||
}
|
||||
|
||||
var minStart = spanArray.Min(s => s.Start);
|
||||
var maxEnd = spanArray.Max(s => s.End);
|
||||
|
||||
return new SourceSpan(minStart, maxEnd);
|
||||
return new SourceSpan(spanArray[0].FilePath, minStart, maxEnd);
|
||||
}
|
||||
|
||||
public SourceSpan(SourceLocation start, SourceLocation end)
|
||||
public SourceSpan(string filePath, SourceLocation start, SourceLocation end)
|
||||
{
|
||||
if (start > end)
|
||||
{
|
||||
throw new ArgumentException("Start location cannot be after end location");
|
||||
}
|
||||
|
||||
FilePath = filePath;
|
||||
Start = start;
|
||||
End = end;
|
||||
}
|
||||
|
||||
public string FilePath { get; }
|
||||
public SourceLocation Start { get; }
|
||||
public SourceLocation End { get; }
|
||||
|
||||
@@ -37,15 +36,15 @@ public readonly struct SourceSpan : IEquatable<SourceSpan>, IComparable<SourceSp
|
||||
{
|
||||
if (Start == End)
|
||||
{
|
||||
return $"{Start}";
|
||||
return $"{FilePath}:{Start}";
|
||||
}
|
||||
|
||||
if (Start.Line == End.Line)
|
||||
{
|
||||
return Start.Column == End.Column ? $"{Start}" : $"{Start.Line}:{Start.Column}-{End.Column}";
|
||||
return Start.Column == End.Column ? $"{FilePath}:{Start}" : $"{FilePath}:{Start.Line}:{Start.Column}-{End.Column}";
|
||||
}
|
||||
|
||||
return $"{Start}-{End}";
|
||||
return $"{FilePath}:{Start}-{End}";
|
||||
}
|
||||
|
||||
public bool Equals(SourceSpan other) => Start == other.Start && End == other.End;
|
||||
@@ -54,7 +53,7 @@ public readonly struct SourceSpan : IEquatable<SourceSpan>, IComparable<SourceSp
|
||||
|
||||
public static bool operator ==(SourceSpan left, SourceSpan right) => Equals(left, right);
|
||||
public static bool operator !=(SourceSpan left, SourceSpan right) => !Equals(left, right);
|
||||
|
||||
|
||||
public static bool operator <(SourceSpan left, SourceSpan right) => left.CompareTo(right) < 0;
|
||||
public static bool operator <=(SourceSpan left, SourceSpan right) => left.CompareTo(right) <= 0;
|
||||
public static bool operator >(SourceSpan left, SourceSpan right) => left.CompareTo(right) > 0;
|
||||
|
||||
@@ -11,7 +11,7 @@ public class Diagnostic
|
||||
{
|
||||
private readonly DiagnosticSeverity _severity;
|
||||
private readonly string _message;
|
||||
private SourceFileSpan? _fileSpan;
|
||||
private SourceSpan? _span;
|
||||
private string? _help;
|
||||
|
||||
public DiagnosticBuilder(DiagnosticSeverity severity, string message)
|
||||
@@ -24,12 +24,7 @@ public class Diagnostic
|
||||
{
|
||||
if (node != null)
|
||||
{
|
||||
var first = node.Tokens.FirstOrDefault();
|
||||
if (first?.FileSpan != null)
|
||||
{
|
||||
var span = SourceSpan.Merge(node.Tokens.Select(x => x.FileSpan.Span));
|
||||
At(new SourceFileSpan(first.FileSpan.SourceFile, span));
|
||||
}
|
||||
_span = SourceSpan.Merge(node.Tokens.Select(x => x.Span));
|
||||
}
|
||||
|
||||
return this;
|
||||
@@ -39,29 +34,35 @@ public class Diagnostic
|
||||
{
|
||||
if (token != null)
|
||||
{
|
||||
At(token.FileSpan);
|
||||
At(token.Span);
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
public DiagnosticBuilder At(SourceFileSpan? fileSpan)
|
||||
public DiagnosticBuilder At(SourceSpan? span)
|
||||
{
|
||||
if (fileSpan != null)
|
||||
if (span != null)
|
||||
{
|
||||
_fileSpan = fileSpan;
|
||||
_span = span;
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
public DiagnosticBuilder At(string filePath, int line, int column)
|
||||
{
|
||||
_span = new SourceSpan(filePath, new SourceLocation(line, column), new SourceLocation(line, column));
|
||||
return this;
|
||||
}
|
||||
|
||||
public DiagnosticBuilder WithHelp(string help)
|
||||
{
|
||||
_help = help;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Diagnostic Build() => new(_severity, _message, _help, _fileSpan);
|
||||
public Diagnostic Build() => new(_severity, _message, _help, _span);
|
||||
}
|
||||
|
||||
public static DiagnosticBuilder Error(string message) => new(DiagnosticSeverity.Error, message);
|
||||
@@ -71,14 +72,14 @@ public class Diagnostic
|
||||
public DiagnosticSeverity Severity { get; }
|
||||
public string Message { get; }
|
||||
public string? Help { get; }
|
||||
public SourceFileSpan? FileSpan { get; }
|
||||
public SourceSpan? Span { get; }
|
||||
|
||||
private Diagnostic(DiagnosticSeverity severity, string message, string? help, SourceFileSpan? fileSpan)
|
||||
private Diagnostic(DiagnosticSeverity severity, string message, string? help, SourceSpan? span)
|
||||
{
|
||||
Severity = severity;
|
||||
Message = message;
|
||||
Help = help;
|
||||
FileSpan = fileSpan;
|
||||
Span = span;
|
||||
}
|
||||
|
||||
public string FormatANSI()
|
||||
@@ -93,23 +94,23 @@ public class Diagnostic
|
||||
_ => ConsoleColors.Colorize("unknown", ConsoleColors.Bold + ConsoleColors.White)
|
||||
});
|
||||
|
||||
if (FileSpan != null)
|
||||
if (Span.HasValue)
|
||||
{
|
||||
sb.Append(ConsoleColors.Colorize($" at {FileSpan.SourceFile.Path}:{FileSpan.Span}", ConsoleColors.Faint));
|
||||
sb.Append(ConsoleColors.Colorize($" at {Span.Value}", ConsoleColors.Faint));
|
||||
}
|
||||
|
||||
sb.Append(": ");
|
||||
sb.Append(ConsoleColors.Colorize(Message, ConsoleColors.BrightWhite));
|
||||
|
||||
if (FileSpan != null)
|
||||
if (Span.HasValue)
|
||||
{
|
||||
sb.AppendLine();
|
||||
var text = FileSpan.SourceFile.GetText();
|
||||
var text = File.ReadAllText(Span.Value.FilePath);
|
||||
|
||||
var lines = text.Split('\n');
|
||||
|
||||
var startLine = FileSpan.Span.Start.Line;
|
||||
var endLine = FileSpan.Span.End.Line;
|
||||
var startLine = Span.Value.Start.Line;
|
||||
var endLine = Span.Value.End.Line;
|
||||
|
||||
const int CONTEXT_LINES = 3;
|
||||
|
||||
@@ -126,8 +127,8 @@ public class Diagnostic
|
||||
sb.Append('╮');
|
||||
sb.AppendLine();
|
||||
|
||||
var tokenizer = new Tokenizer(FileSpan.SourceFile);
|
||||
var tokens = tokenizer.Tokenize().ToList();
|
||||
var tokenizer = new Tokenizer(Span.Value.FilePath, text);
|
||||
tokenizer.Tokenize();
|
||||
|
||||
for (var i = contextStartLine; i <= contextEndLine; i++)
|
||||
{
|
||||
@@ -136,7 +137,7 @@ public class Diagnostic
|
||||
sb.Append("│ ");
|
||||
sb.Append(i.ToString().PadRight(numberPadding));
|
||||
sb.Append(" │ ");
|
||||
sb.Append(ApplySyntaxHighlighting(line.PadRight(codePadding), i, tokens));
|
||||
sb.Append(ApplySyntaxHighlighting(line.PadRight(codePadding), i, tokenizer.Tokens));
|
||||
sb.Append(" │");
|
||||
sb.AppendLine();
|
||||
|
||||
@@ -147,12 +148,12 @@ public class Diagnostic
|
||||
|
||||
if (i == startLine)
|
||||
{
|
||||
markerStartColumn = FileSpan.Span.Start.Column;
|
||||
markerStartColumn = Span.Value.Start.Column;
|
||||
}
|
||||
|
||||
if (i == endLine)
|
||||
{
|
||||
markerEndColumn = FileSpan.Span.End.Column;
|
||||
markerEndColumn = Span.Value.End.Column;
|
||||
}
|
||||
|
||||
var markerLength = markerEndColumn - markerStartColumn;
|
||||
@@ -197,8 +198,8 @@ public class Diagnostic
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
var lineTokens = tokens
|
||||
.Where(t => t.FileSpan.Span.Start.Line == lineNumber)
|
||||
.OrderBy(t => t.FileSpan.Span.Start.Column)
|
||||
.Where(t => t.Span.Start.Line == lineNumber)
|
||||
.OrderBy(t => t.Span.Start.Column)
|
||||
.ToList();
|
||||
|
||||
if (lineTokens.Count == 0)
|
||||
@@ -210,8 +211,8 @@ public class Diagnostic
|
||||
|
||||
foreach (var token in lineTokens)
|
||||
{
|
||||
var tokenStart = token.FileSpan.Span.Start.Column;
|
||||
var tokenEnd = token.FileSpan.Span.End.Column;
|
||||
var tokenStart = token.Span.Start.Column;
|
||||
var tokenEnd = token.Span.End.Column;
|
||||
|
||||
if (tokenStart > currentColumn)
|
||||
{
|
||||
|
||||
@@ -7,7 +7,6 @@ namespace NubLang.Parsing;
|
||||
|
||||
public sealed class Parser
|
||||
{
|
||||
private readonly List<Diagnostic> _diagnostics = [];
|
||||
private readonly HashSet<string> _templateArguments = [];
|
||||
private List<Token> _tokens = [];
|
||||
private int _tokenIndex;
|
||||
@@ -16,14 +15,11 @@ public sealed class Parser
|
||||
private Token? CurrentToken => _tokenIndex < _tokens.Count ? _tokens[_tokenIndex] : null;
|
||||
private bool HasToken => CurrentToken != null;
|
||||
|
||||
public List<Diagnostic> GetDiagnostics()
|
||||
{
|
||||
return _diagnostics;
|
||||
}
|
||||
public List<Diagnostic> Diagnostics { get; } = [];
|
||||
|
||||
public SyntaxTree Parse(List<Token> tokens)
|
||||
{
|
||||
_diagnostics.Clear();
|
||||
Diagnostics.Clear();
|
||||
_tokens = tokens;
|
||||
_tokenIndex = 0;
|
||||
_moduleName = string.Empty;
|
||||
@@ -51,7 +47,7 @@ public sealed class Parser
|
||||
}
|
||||
catch (ParseException e)
|
||||
{
|
||||
_diagnostics.Add(e.Diagnostic);
|
||||
Diagnostics.Add(e.Diagnostic);
|
||||
while (HasToken)
|
||||
{
|
||||
if (CurrentToken is SymbolToken { Symbol: Symbol.Module or Symbol.Import })
|
||||
@@ -102,7 +98,7 @@ public sealed class Parser
|
||||
}
|
||||
catch (ParseException e)
|
||||
{
|
||||
_diagnostics.Add(e.Diagnostic);
|
||||
Diagnostics.Add(e.Diagnostic);
|
||||
while (HasToken)
|
||||
{
|
||||
if (CurrentToken is SymbolToken { Symbol: Symbol.Extern or Symbol.Func or Symbol.Struct })
|
||||
@@ -692,7 +688,7 @@ public sealed class Parser
|
||||
}
|
||||
catch (ParseException ex)
|
||||
{
|
||||
_diagnostics.Add(ex.Diagnostic);
|
||||
Diagnostics.Add(ex.Diagnostic);
|
||||
if (HasToken)
|
||||
{
|
||||
Next();
|
||||
|
||||
@@ -2,22 +2,6 @@
|
||||
|
||||
namespace NubLang.Tokenization;
|
||||
|
||||
public abstract class Token(SourceFileSpan fileSpan)
|
||||
{
|
||||
public SourceFileSpan FileSpan { get; } = fileSpan;
|
||||
}
|
||||
|
||||
public class IdentifierToken(SourceFileSpan fileSpan, string value) : Token(fileSpan)
|
||||
{
|
||||
public string Value { get; } = value;
|
||||
}
|
||||
|
||||
public class LiteralToken(SourceFileSpan fileSpan, LiteralKind kind, string value) : Token(fileSpan)
|
||||
{
|
||||
public LiteralKind Kind { get; } = kind;
|
||||
public string Value { get; } = value;
|
||||
}
|
||||
|
||||
public enum LiteralKind
|
||||
{
|
||||
Integer,
|
||||
@@ -26,11 +10,6 @@ public enum LiteralKind
|
||||
Bool
|
||||
}
|
||||
|
||||
public class SymbolToken(SourceFileSpan fileSpan, Symbol symbol) : Token(fileSpan)
|
||||
{
|
||||
public Symbol Symbol { get; } = symbol;
|
||||
}
|
||||
|
||||
public enum Symbol
|
||||
{
|
||||
Func,
|
||||
@@ -83,4 +62,12 @@ public enum Symbol
|
||||
Defer,
|
||||
At,
|
||||
Enum,
|
||||
}
|
||||
}
|
||||
|
||||
public abstract record Token(string FileName, SourceSpan Span);
|
||||
|
||||
public record IdentifierToken(string FileName, SourceSpan Span, string Value) : Token(FileName, Span);
|
||||
|
||||
public record LiteralToken(string FileName, SourceSpan Span, LiteralKind Kind, string Value) : Token(FileName, Span);
|
||||
|
||||
public record SymbolToken(string FileName, SourceSpan Span, Symbol Symbol) : Token(FileName, Span);
|
||||
@@ -68,171 +68,196 @@ public sealed class Tokenizer
|
||||
.Select(kvp => (kvp.Key, kvp.Value))
|
||||
.ToArray();
|
||||
|
||||
private readonly SourceFile _sourceFile;
|
||||
private readonly List<Diagnostic> _diagnostics = [];
|
||||
private int _index;
|
||||
private readonly string _fileName;
|
||||
private readonly string _content;
|
||||
private int _index = 0;
|
||||
private int _line = 1;
|
||||
private int _column = 1;
|
||||
|
||||
public Tokenizer(SourceFile sourceFile)
|
||||
public Tokenizer(string fileName, string content)
|
||||
{
|
||||
_sourceFile = sourceFile;
|
||||
_fileName = fileName;
|
||||
_content = content;
|
||||
}
|
||||
|
||||
public List<Diagnostic> GetDiagnostics() => _diagnostics;
|
||||
public List<Diagnostic> Diagnostics { get; } = [];
|
||||
public List<Token> Tokens { get; } = [];
|
||||
|
||||
public IEnumerable<Token> Tokenize()
|
||||
public void Tokenize()
|
||||
{
|
||||
Diagnostics.Clear();
|
||||
Tokens.Clear();
|
||||
_index = 0;
|
||||
_line = 1;
|
||||
_column = 1;
|
||||
|
||||
while (Peek() != null)
|
||||
while (Peek().HasValue)
|
||||
{
|
||||
var current = Peek()!.Value;
|
||||
if (char.IsWhiteSpace(current))
|
||||
try
|
||||
{
|
||||
Next();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (current == '/' && Peek(1) == '/')
|
||||
{
|
||||
while (Peek().HasValue && Peek() != '\n')
|
||||
// Skip whitespace and increment line counter if newline
|
||||
var current = Peek()!.Value;
|
||||
if (char.IsWhiteSpace(current))
|
||||
{
|
||||
if (current is '\n')
|
||||
{
|
||||
_line += 1;
|
||||
_column = 1;
|
||||
}
|
||||
|
||||
Next();
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
var tokenStartIndex = _index;
|
||||
|
||||
if (char.IsLetter(current) || current == '_')
|
||||
{
|
||||
var buffer = string.Empty;
|
||||
|
||||
while (Peek() != null && (char.IsLetterOrDigit(Peek()!.Value) || Peek() == '_'))
|
||||
{
|
||||
buffer += Peek();
|
||||
Next();
|
||||
}
|
||||
|
||||
if (Keywords.TryGetValue(buffer, out var keywordSymbol))
|
||||
{
|
||||
yield return new SymbolToken(GetSourceFileSpan(tokenStartIndex), keywordSymbol);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (buffer is "true" or "false")
|
||||
// Skip single line comments but keep newline so next iteration increments the line counter
|
||||
if (current == '/' && Peek(1) == '/')
|
||||
{
|
||||
yield return new LiteralToken(GetSourceFileSpan(tokenStartIndex), LiteralKind.Bool, buffer);
|
||||
while (Peek() is not '\n')
|
||||
{
|
||||
Next();
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
yield return new IdentifierToken(GetSourceFileSpan(tokenStartIndex), buffer);
|
||||
continue;
|
||||
Tokens.Add(ParseToken(current, _line, _column));
|
||||
}
|
||||
|
||||
if (char.IsDigit(current))
|
||||
{
|
||||
var isFloat = false;
|
||||
var buffer = string.Empty;
|
||||
|
||||
while (Peek() != null)
|
||||
{
|
||||
var next = Peek()!.Value;
|
||||
if (next == '.')
|
||||
{
|
||||
if (isFloat)
|
||||
{
|
||||
throw new Exception("More than one period found in float literal");
|
||||
}
|
||||
|
||||
isFloat = true;
|
||||
buffer += next;
|
||||
Next();
|
||||
}
|
||||
else if (char.IsDigit(next))
|
||||
{
|
||||
buffer += next;
|
||||
Next();
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
yield return new LiteralToken(GetSourceFileSpan(tokenStartIndex), isFloat ? LiteralKind.Float : LiteralKind.Integer, buffer);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (current == '"')
|
||||
catch (TokenizerException e)
|
||||
{
|
||||
Diagnostics.Add(e.Diagnostic);
|
||||
Next();
|
||||
var buffer = string.Empty;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while (true)
|
||||
private Token ParseToken(char current, int lineStart, int columnStart)
|
||||
{
|
||||
if (char.IsLetter(current) || current == '_')
|
||||
{
|
||||
var buffer = string.Empty;
|
||||
|
||||
while (Peek() != null && (char.IsLetterOrDigit(Peek()!.Value) || Peek() == '_'))
|
||||
{
|
||||
buffer += Peek();
|
||||
Next();
|
||||
}
|
||||
|
||||
if (Keywords.TryGetValue(buffer, out var keywordSymbol))
|
||||
{
|
||||
return new SymbolToken(_fileName, CreateSpan(lineStart, columnStart), keywordSymbol);
|
||||
}
|
||||
|
||||
if (buffer is "true" or "false")
|
||||
{
|
||||
return new LiteralToken(_fileName, CreateSpan(lineStart, columnStart), LiteralKind.Bool, buffer);
|
||||
}
|
||||
|
||||
return new IdentifierToken(_fileName, CreateSpan(lineStart, columnStart), buffer);
|
||||
}
|
||||
|
||||
if (char.IsDigit(current))
|
||||
{
|
||||
var isFloat = false;
|
||||
var buffer = string.Empty;
|
||||
|
||||
while (Peek() != null)
|
||||
{
|
||||
var next = Peek()!.Value;
|
||||
if (next == '.')
|
||||
{
|
||||
if (Peek() == null)
|
||||
if (isFloat)
|
||||
{
|
||||
throw new Exception("Unclosed string literal");
|
||||
}
|
||||
|
||||
var next = Peek()!.Value;
|
||||
if (next == '"')
|
||||
{
|
||||
Next();
|
||||
break;
|
||||
throw new TokenizerException(Diagnostic
|
||||
.Error("More than one period found in float literal")
|
||||
.At(_fileName, _line, _column)
|
||||
.Build());
|
||||
}
|
||||
|
||||
isFloat = true;
|
||||
buffer += next;
|
||||
Next();
|
||||
}
|
||||
|
||||
yield return new LiteralToken(GetSourceFileSpan(tokenStartIndex), LiteralKind.String, buffer);
|
||||
continue;
|
||||
}
|
||||
|
||||
var foundMatch = false;
|
||||
foreach (var (pattern, symbol) in OrderedSymbols)
|
||||
{
|
||||
for (var i = 0; i < pattern.Length; i++)
|
||||
else if (char.IsDigit(next))
|
||||
{
|
||||
var c = Peek(i);
|
||||
if (!c.HasValue || c.Value != pattern[i]) break;
|
||||
|
||||
if (i == pattern.Length - 1)
|
||||
{
|
||||
for (var j = 0; j <= i; j++)
|
||||
{
|
||||
Next();
|
||||
}
|
||||
|
||||
yield return new SymbolToken(GetSourceFileSpan(tokenStartIndex), symbol);
|
||||
foundMatch = true;
|
||||
break;
|
||||
}
|
||||
buffer += next;
|
||||
Next();
|
||||
}
|
||||
|
||||
if (foundMatch)
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (foundMatch)
|
||||
return new LiteralToken(_fileName, CreateSpan(lineStart, columnStart), isFloat ? LiteralKind.Float : LiteralKind.Integer, buffer);
|
||||
}
|
||||
|
||||
if (current == '"')
|
||||
{
|
||||
Next();
|
||||
var buffer = string.Empty;
|
||||
|
||||
while (true)
|
||||
{
|
||||
continue;
|
||||
var next = Peek();
|
||||
if (!next.HasValue)
|
||||
{
|
||||
throw new TokenizerException(Diagnostic
|
||||
.Error("Unclosed string literal")
|
||||
.At(_fileName, _line, _column)
|
||||
.Build());
|
||||
}
|
||||
|
||||
if (next is '\n')
|
||||
{
|
||||
_line += 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (next is '"')
|
||||
{
|
||||
Next();
|
||||
break;
|
||||
}
|
||||
|
||||
buffer += next;
|
||||
Next();
|
||||
}
|
||||
|
||||
_diagnostics.Add(Diagnostic.Error($"Unknown token '{current}'").At(GetSourceFileSpan(tokenStartIndex)).Build());
|
||||
Next();
|
||||
return new LiteralToken(_fileName, CreateSpan(lineStart, columnStart), LiteralKind.String, buffer);
|
||||
}
|
||||
|
||||
foreach (var (pattern, symbol) in OrderedSymbols)
|
||||
{
|
||||
for (var i = 0; i < pattern.Length; i++)
|
||||
{
|
||||
var c = Peek(i);
|
||||
if (!c.HasValue || c.Value != pattern[i]) break;
|
||||
|
||||
if (i == pattern.Length - 1)
|
||||
{
|
||||
for (var j = 0; j <= i; j++)
|
||||
{
|
||||
Next();
|
||||
}
|
||||
|
||||
return new SymbolToken(_fileName, CreateSpan(lineStart, columnStart), symbol);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw new TokenizerException(Diagnostic.Error($"Unknown token '{current}'").Build());
|
||||
}
|
||||
|
||||
private SourceSpan CreateSpan(int lineStart, int columnStart)
|
||||
{
|
||||
return new SourceSpan(_fileName, new SourceLocation(lineStart, columnStart), new SourceLocation(_line, _column));
|
||||
}
|
||||
|
||||
private char? Peek(int offset = 0)
|
||||
{
|
||||
if (_index + offset < _sourceFile.GetText().Length)
|
||||
if (_index + offset < _content.Length)
|
||||
{
|
||||
return _sourceFile.GetText()[_index + offset];
|
||||
return _content[_index + offset];
|
||||
}
|
||||
|
||||
return null;
|
||||
@@ -240,34 +265,17 @@ public sealed class Tokenizer
|
||||
|
||||
private void Next()
|
||||
{
|
||||
_index++;
|
||||
_index += 1;
|
||||
_column += 1;
|
||||
}
|
||||
}
|
||||
|
||||
private SourceFileSpan GetSourceFileSpan(int tokenStartIndex)
|
||||
public class TokenizerException : Exception
|
||||
{
|
||||
public Diagnostic Diagnostic { get; }
|
||||
|
||||
public TokenizerException(Diagnostic diagnostic) : base(diagnostic.Message)
|
||||
{
|
||||
var start = CalculateSourceLocation(tokenStartIndex);
|
||||
var end = CalculateSourceLocation(_index);
|
||||
return new SourceFileSpan(_sourceFile, new SourceSpan(start, end));
|
||||
}
|
||||
|
||||
private SourceLocation CalculateSourceLocation(int index)
|
||||
{
|
||||
var line = 1;
|
||||
var column = 1;
|
||||
|
||||
for (var i = 0; i < index && i < _sourceFile.GetText().Length; i++)
|
||||
{
|
||||
if (_sourceFile.GetText()[i] == '\n')
|
||||
{
|
||||
line++;
|
||||
column = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
column++;
|
||||
}
|
||||
}
|
||||
|
||||
return new SourceLocation(line, column);
|
||||
Diagnostic = diagnostic;
|
||||
}
|
||||
}
|
||||
@@ -6,7 +6,7 @@ out: .build/out.o
|
||||
.build/out.o: $(NUBC) src/main.nub src/raylib.nub
|
||||
$(NUBC) src/main.nub src/raylib.nub
|
||||
|
||||
# .PHONY: $(NUBC)
|
||||
.PHONY: $(NUBC)
|
||||
$(NUBC):
|
||||
dotnet build ../compiler/NubLang.CLI/NubLang.CLI.csproj
|
||||
|
||||
|
||||
Reference in New Issue
Block a user