This commit is contained in:
2026-02-08 00:05:59 +01:00
parent 3b75e62aa7
commit cb2411a7eb
11 changed files with 1042 additions and 806 deletions

View File

@@ -0,0 +1,10 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net9.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
</Project>

View File

@@ -0,0 +1,216 @@
using System.Globalization;
using System.Text;
namespace Compiler;
public sealed class Generator(List<NodeDefinition> nodes)
{
public static string Emit(List<NodeDefinition> nodes)
{
return new Generator(nodes).Emit();
}
private IndentedTextWriter writer = new();
private string Emit()
{
writer.WriteLine("""
struct string {
const char *data;
int length;
};
""");
foreach (var node in nodes.OfType<NodeDefinitionFunc>())
{
var parameters = node.Parameters.Select(x => $"{CType(x.Type)} {x.Name.Ident}");
writer.WriteLine($"{CType(node.ReturnType)} {node.Name.Ident}({string.Join(", ", parameters)});");
}
writer.WriteLine();
foreach (var node in nodes.OfType<NodeDefinitionFunc>())
{
var parameters = node.Parameters.Select(x => $"{CType(x.Type)} {x.Name.Ident}");
writer.WriteLine($"{CType(node.ReturnType)} {node.Name.Ident}({string.Join(", ", parameters)})");
writer.WriteLine("{");
using (writer.Indent())
{
EmitStatement(node.Body);
}
writer.WriteLine("}");
writer.WriteLine();
}
return writer.ToString();
}
private void EmitStatement(NodeStatement node)
{
switch (node)
{
case NodeStatementBlock statement:
EmitStatementBlock(statement);
break;
case NodeStatementFuncCall statement:
EmitStatementFuncCall(statement);
break;
}
}
private void EmitStatementBlock(NodeStatementBlock node)
{
writer.WriteLine("{");
using (writer.Indent())
{
foreach (var statement in node.Statements)
EmitStatement(statement);
}
writer.WriteLine("}");
}
private void EmitStatementFuncCall(NodeStatementFuncCall node)
{
var name = EmitExpression(node.Func);
var parameterValues = node.Parameters.Select(EmitExpression).ToList();
writer.WriteLine($"{name}({string.Join(", ", parameterValues)});");
}
private string EmitExpression(NodeExpression node)
{
return node switch
{
NodeExpressionBoolLiteral expression => EmitExpressionBoolLiteral(expression),
NodeExpressionFloatLiteral expression => EmitExpressionFloatLiteral(expression),
NodeExpressionIntLiteral expression => EmitExpressionIntLiteral(expression),
NodeExpressionStringLiteral expression => EmitExpressionStringLiteral(expression),
NodeExpressionIdent expression => EmitExpressionIdent(expression),
};
}
private string EmitExpressionBoolLiteral(NodeExpressionBoolLiteral expression)
{
return expression.Value.Value ? "1" : "0";
}
private string EmitExpressionFloatLiteral(NodeExpressionFloatLiteral expression)
{
return expression.Value.Value.ToString(CultureInfo.InvariantCulture);
}
private string EmitExpressionIntLiteral(NodeExpressionIntLiteral expression)
{
return expression.Value.Value.ToString();
}
private string EmitExpressionStringLiteral(NodeExpressionStringLiteral expression)
{
return $"(struct string){{ \"{expression.Value.Value}\", {expression.Value.Value.Length} }}";
}
private string EmitExpressionIdent(NodeExpressionIdent expression)
{
return expression.Value.Ident;
}
private static string CType(NodeType node)
{
return node switch
{
NodeTypeCustom type => $"struct {type.Name.Ident}",
NodeTypeFloat type => type.Width switch
{
32 => "float",
64 => "double",
},
NodeTypePointer type => $"{CType(type.To)}*",
NodeTypeSInt type => type.Width switch
{
8 => "byte",
16 => "short",
32 => "int",
64 => "long",
},
NodeTypeUInt type => type.Width switch
{
8 => "unsigned byte",
16 => "unsigned short",
32 => "unsigned int",
64 => "unsigned long",
},
NodeTypeString => "struct string",
NubTypeVoid => "void",
_ => throw new ArgumentOutOfRangeException(nameof(node))
};
}
}
internal class IndentedTextWriter
{
private readonly StringBuilder _builder = new();
private int _indentLevel;
public IDisposable Indent()
{
_indentLevel++;
return new IndentScope(this);
}
public void WriteLine(string text)
{
WriteIndent();
_builder.AppendLine(text);
}
public void Write(string text)
{
WriteIndent();
_builder.Append(text);
}
public void WriteLine()
{
_builder.AppendLine();
}
public override string ToString()
{
return _builder.ToString();
}
private void WriteIndent()
{
if (_builder.Length > 0)
{
var lastChar = _builder[^1];
if (lastChar != '\n' && lastChar != '\r')
return;
}
for (var i = 0; i < _indentLevel; i++)
{
_builder.Append(" ");
}
}
private class IndentScope : IDisposable
{
private readonly IndentedTextWriter _writer;
private bool _disposed;
public IndentScope(IndentedTextWriter writer)
{
_writer = writer;
}
public void Dispose()
{
if (_disposed) return;
_writer._indentLevel--;
_disposed = true;
}
}
}

377
compiler/Compiler/Parser.cs Normal file
View File

@@ -0,0 +1,377 @@
using System.Diagnostics.CodeAnalysis;
namespace Compiler;
public sealed class Parser(List<Token> tokens)
{
public static List<NodeDefinition> Parse(List<Token> tokens)
{
return new Parser(tokens).Parse();
}
private int index;
private List<NodeDefinition> Parse()
{
var nodes = new List<NodeDefinition>();
while (Peek() != null)
nodes.Add(ParseDefinition());
return nodes;
}
private NodeDefinition ParseDefinition()
{
var startIndex = index;
if (TryExpectKeyword(Keyword.Func))
{
var name = ExpectIdent();
var parameters = new List<NodeDefinitionFunc.Param>();
ExpectSymbol(Symbol.OpenParen);
while (!TryExpectSymbol(Symbol.CloseParen))
{
var paramStartIndex = index;
var parameterName = ExpectIdent();
ExpectSymbol(Symbol.Colon);
var parameterType = ParseType();
parameters.Add(new NodeDefinitionFunc.Param(TokensFrom(paramStartIndex), parameterName, parameterType));
}
ExpectSymbol(Symbol.Colon);
var returnType = ParseType();
var body = ParseStatement();
return new NodeDefinitionFunc(TokensFrom(startIndex), name, parameters, body, returnType);
}
throw new Exception("Not a valid definition");
}
private NodeStatement ParseStatement()
{
var startIndex = index;
if (TryExpectSymbol(Symbol.OpenCurly))
{
var statements = new List<NodeStatement>();
while (!TryExpectSymbol(Symbol.CloseCurly))
statements.Add(ParseStatement());
return new NodeStatementBlock(TokensFrom(startIndex), statements);
}
var expression = ParseExpression();
var parameters = new List<NodeExpression>();
ExpectSymbol(Symbol.OpenParen);
while (!TryExpectSymbol(Symbol.CloseParen))
parameters.Add(ParseExpression());
return new NodeStatementFuncCall(TokensFrom(startIndex), expression, parameters);
}
private NodeExpression ParseExpression()
{
var startIndex = index;
if (TryExpectIntLiteral(out var intLiteral))
{
return new NodeExpressionIntLiteral(TokensFrom(startIndex), intLiteral);
}
if (TryExpectFloatLiteral(out var floatLiteral))
{
return new NodeExpressionFloatLiteral(TokensFrom(startIndex), floatLiteral);
}
if (TryExpectStringLiteral(out var stringLiteral))
{
return new NodeExpressionStringLiteral(TokensFrom(startIndex), stringLiteral);
}
if (TryExpectBoolLiteral(out var boolLiteral))
{
return new NodeExpressionBoolLiteral(TokensFrom(startIndex), boolLiteral);
}
if (TryExpectIdent(out var ident))
{
return new NodeExpressionIdent(TokensFrom(startIndex), ident);
}
throw new Exception("Not a valid expression");
}
private NodeType ParseType()
{
var startIndex = index;
if (TryExpectSymbol(Symbol.Caret))
{
var to = ParseType();
return new NodeTypePointer(TokensFrom(startIndex), to);
}
if (TryExpectIdent(out var ident))
{
switch (ident.Ident)
{
case "void":
return new NubTypeVoid(TokensFrom(startIndex));
case "string":
return new NodeTypeString(TokensFrom(startIndex));
case "i8":
return new NodeTypeSInt(TokensFrom(startIndex), 8);
case "i16":
return new NodeTypeSInt(TokensFrom(startIndex), 16);
case "i32":
return new NodeTypeSInt(TokensFrom(startIndex), 32);
case "i64":
return new NodeTypeSInt(TokensFrom(startIndex), 64);
case "u8":
return new NodeTypeUInt(TokensFrom(startIndex), 8);
case "u16":
return new NodeTypeUInt(TokensFrom(startIndex), 16);
case "u32":
return new NodeTypeUInt(TokensFrom(startIndex), 32);
case "u64":
return new NodeTypeUInt(TokensFrom(startIndex), 64);
case "f32":
return new NodeTypeFloat(TokensFrom(startIndex), 32);
case "f64":
return new NodeTypeFloat(TokensFrom(startIndex), 64);
default:
return new NodeTypeCustom(TokensFrom(startIndex), ident);
}
}
throw new Exception("Not a valid type");
}
private List<Token> TokensFrom(int startIndex)
{
return tokens.GetRange(startIndex, index - startIndex);
}
private bool TryExpectKeyword(Keyword keyword)
{
if (Peek() is TokenKeyword token && token.Keyword == keyword)
{
Consume();
return true;
}
return false;
}
private void ExpectSymbol(Symbol symbol)
{
if (Peek() is TokenSymbol token && token.Symbol == symbol)
{
Consume();
return;
}
throw new Exception($"Expected symbol '{symbol}'");
}
private bool TryExpectSymbol(Symbol symbol)
{
if (Peek() is TokenSymbol token && token.Symbol == symbol)
{
Consume();
return true;
}
return false;
}
private TokenIdent ExpectIdent()
{
if (Peek() is TokenIdent token)
{
Consume();
return token;
}
throw new Exception("Expected ident");
}
private bool TryExpectIdent([NotNullWhen(true)] out TokenIdent? ident)
{
if (Peek() is TokenIdent token)
{
Consume();
ident = token;
return true;
}
ident = null;
return false;
}
private bool TryExpectIntLiteral([NotNullWhen(true)] out TokenIntLiteral? intLiteral)
{
if (Peek() is TokenIntLiteral token)
{
Consume();
intLiteral = token;
return true;
}
intLiteral = null;
return false;
}
private bool TryExpectFloatLiteral([NotNullWhen(true)] out TokenFloatLiteral? floatLiteral)
{
if (Peek() is TokenFloatLiteral token)
{
Consume();
floatLiteral = token;
return true;
}
floatLiteral = null;
return false;
}
private bool TryExpectStringLiteral([NotNullWhen(true)] out TokenStringLiteral? stringLiteral)
{
if (Peek() is TokenStringLiteral token)
{
Consume();
stringLiteral = token;
return true;
}
stringLiteral = null;
return false;
}
private bool TryExpectBoolLiteral([NotNullWhen(true)] out TokenBoolLiteral? boolLiteral)
{
if (Peek() is TokenBoolLiteral token)
{
Consume();
boolLiteral = token;
return true;
}
boolLiteral = null;
return false;
}
private Token Consume()
{
if (index >= tokens.Count)
throw new Exception("End of tokens");
return tokens[index++];
}
private Token? Peek(int offset = 0)
{
if (index + offset >= tokens.Count)
return null;
return tokens[index + offset];
}
}
public abstract class Node(List<Token> tokens)
{
public List<Token> Tokens = tokens;
}
public abstract class NodeDefinition(List<Token> tokens) : Node(tokens);
public sealed class NodeDefinitionFunc(List<Token> tokens, TokenIdent name, List<NodeDefinitionFunc.Param> parameters, NodeStatement body, NodeType returnType)
: NodeDefinition(tokens)
{
public TokenIdent Name = name;
public List<Param> Parameters = parameters;
public NodeStatement Body = body;
public NodeType ReturnType = returnType;
public sealed class Param(List<Token> tokens, TokenIdent name, NodeType type) : Node(tokens)
{
public TokenIdent Name = name;
public NodeType Type = type;
}
}
public abstract class NodeStatement(List<Token> tokens) : Node(tokens);
public sealed class NodeStatementBlock(List<Token> tokens, List<NodeStatement> statements) : NodeStatement(tokens)
{
public List<NodeStatement> Statements = statements;
}
public sealed class NodeStatementFuncCall(List<Token> tokens, NodeExpression func, List<NodeExpression> parameters) : NodeStatement(tokens)
{
public NodeExpression Func = func;
public List<NodeExpression> Parameters = parameters;
}
public abstract class NodeExpression(List<Token> tokens) : Node(tokens);
public sealed class NodeExpressionIntLiteral(List<Token> tokens, TokenIntLiteral value) : NodeExpression(tokens)
{
public TokenIntLiteral Value = value;
}
public sealed class NodeExpressionFloatLiteral(List<Token> tokens, TokenFloatLiteral value) : NodeExpression(tokens)
{
public TokenFloatLiteral Value = value;
}
public sealed class NodeExpressionStringLiteral(List<Token> tokens, TokenStringLiteral value) : NodeExpression(tokens)
{
public TokenStringLiteral Value = value;
}
public sealed class NodeExpressionBoolLiteral(List<Token> tokens, TokenBoolLiteral value) : NodeExpression(tokens)
{
public TokenBoolLiteral Value = value;
}
public sealed class NodeExpressionIdent(List<Token> tokens, TokenIdent value) : NodeExpression(tokens)
{
public TokenIdent Value = value;
}
public abstract class NodeType(List<Token> tokens) : Node(tokens);
public sealed class NubTypeVoid(List<Token> tokens) : NodeType(tokens);
public sealed class NodeTypeUInt(List<Token> tokens, int width) : NodeType(tokens)
{
public int Width = width;
}
public sealed class NodeTypeSInt(List<Token> tokens, int width) : NodeType(tokens)
{
public int Width = width;
}
public sealed class NodeTypeFloat(List<Token> tokens, int width) : NodeType(tokens)
{
public int Width = width;
}
public sealed class NodeTypeString(List<Token> tokens) : NodeType(tokens);
public sealed class NodeTypeCustom(List<Token> tokens, TokenIdent name) : NodeType(tokens)
{
public TokenIdent Name = name;
}
public sealed class NodeTypePointer(List<Token> tokens, NodeType to) : NodeType(tokens)
{
public NodeType To = to;
}

View File

@@ -0,0 +1,16 @@
using Compiler;
const string contents = """
func main(): void {
do_something("test")
}
func do_something(text: string): void {
}
""";
var tokens = Tokenizer.Tokenize(contents);
var nodes = Parser.Parse(tokens);
var output = Generator.Emit(nodes);
Console.WriteLine(output);

View File

@@ -0,0 +1,410 @@
using System.Numerics;
using System.Text;
namespace Compiler;
public sealed class Tokenizer(string contents)
{
public static List<Token> Tokenize(string contents)
{
return new Tokenizer(contents).Tokenize();
}
private int index;
private int line = 1;
private int column = 1;
private List<Token> Tokenize()
{
var tokens = new List<Token>();
while (true)
{
if (!TryPeek(out var c))
break;
if (char.IsWhiteSpace(c))
{
Consume();
continue;
}
tokens.Add(ParseToken());
}
return tokens;
}
private Token ParseToken()
{
var startColumn = column;
var c = Peek()!.Value;
if (char.IsDigit(c))
{
switch (c)
{
case '0' when Peek(1) is 'x':
{
Consume();
Consume();
var parsed = BigInteger.Zero;
while (TryPeek(out c))
{
if (c == '_')
{
Consume();
continue;
}
if (!char.IsAsciiHexDigit(c))
break;
parsed <<= 4;
Consume();
parsed += c switch
{
>= '0' and <= '9' => c - '0',
>= 'a' and <= 'f' => c - 'a' + 10,
>= 'A' and <= 'F' => c - 'A' + 10,
_ => 0
};
}
return new TokenIntLiteral(line, startColumn, column - startColumn, parsed);
}
case '0' when Peek(1) is 'b':
{
Consume();
Consume();
var parsed = BigInteger.Zero;
while (TryPeek(out c))
{
if (c == '_')
{
Consume();
continue;
}
if (c is not '0' and not '1')
break;
parsed <<= 1;
if (Consume() == '1')
parsed += BigInteger.One;
}
return new TokenIntLiteral(line, startColumn, column - startColumn, parsed);
}
default:
{
var parsed = BigInteger.Zero;
while (TryPeek(out c))
{
if (c == '_')
{
Consume();
continue;
}
if (!char.IsDigit(c))
break;
parsed *= 10;
parsed += Consume() - '0';
}
return new TokenIntLiteral(line, startColumn, column - startColumn, parsed);
}
}
}
switch (c)
{
case '"':
{
Consume();
var buf = new StringBuilder();
while (TryPeek(out c) && c != '"')
buf.Append(Consume());
Consume();
return new TokenStringLiteral(line, startColumn, column - startColumn, buf.ToString());
}
case '{':
{
Consume();
return new TokenSymbol(line, startColumn, column - startColumn, Symbol.OpenCurly);
}
case '}':
{
Consume();
return new TokenSymbol(line, startColumn, column - startColumn, Symbol.CloseCurly);
}
case '(':
{
Consume();
return new TokenSymbol(line, startColumn, column - startColumn, Symbol.OpenParen);
}
case ')':
{
Consume();
return new TokenSymbol(line, startColumn, column - startColumn, Symbol.CloseParen);
}
case ',':
{
Consume();
return new TokenSymbol(line, startColumn, column - startColumn, Symbol.Comma);
}
case ':':
{
Consume();
return new TokenSymbol(line, startColumn, column - startColumn, Symbol.Colon);
}
case '^':
{
Consume();
return new TokenSymbol(line, startColumn, column - startColumn, Symbol.Caret);
}
case '!' when Peek(1) is '=':
{
Consume();
Consume();
return new TokenSymbol(line, startColumn, column - startColumn, Symbol.BangEqual);
}
case '!':
{
Consume();
return new TokenSymbol(line, startColumn, column - startColumn, Symbol.Bang);
}
case '=' when Peek(1) is '=':
{
Consume();
Consume();
return new TokenSymbol(line, startColumn, column - startColumn, Symbol.EqualEqual);
}
case '=':
{
Consume();
return new TokenSymbol(line, startColumn, column - startColumn, Symbol.Equal);
}
case '<' when Peek(1) is '=':
{
Consume();
Consume();
return new TokenSymbol(line, startColumn, column - startColumn, Symbol.LessThanEqual);
}
case '<':
{
Consume();
return new TokenSymbol(line, startColumn, column - startColumn, Symbol.LessThan);
}
case '>' when Peek(1) is '=':
{
Consume();
Consume();
return new TokenSymbol(line, startColumn, column - startColumn, Symbol.GreaterThanEqual);
}
case '>':
{
Consume();
return new TokenSymbol(line, startColumn, column - startColumn, Symbol.GreaterThan);
}
case '+' when Peek(1) is '=':
{
Consume();
Consume();
return new TokenSymbol(line, startColumn, column - startColumn, Symbol.PlusEqual);
}
case '+':
{
Consume();
return new TokenSymbol(line, startColumn, column - startColumn, Symbol.Plus);
}
case '-' when Peek(1) is '=':
{
Consume();
Consume();
return new TokenSymbol(line, startColumn, column - startColumn, Symbol.MinusEqual);
}
case '-':
{
Consume();
return new TokenSymbol(line, startColumn, column - startColumn, Symbol.Minus);
}
case '*' when Peek(1) is '=':
{
Consume();
Consume();
return new TokenSymbol(line, startColumn, column - startColumn, Symbol.StarEqual);
}
case '*':
{
Consume();
return new TokenSymbol(line, startColumn, column - startColumn, Symbol.Star);
}
case '/' when Peek(1) is '=':
{
Consume();
Consume();
return new TokenSymbol(line, startColumn, column - startColumn, Symbol.ForwardSlashEqual);
}
case '/':
{
Consume();
return new TokenSymbol(line, startColumn, column - startColumn, Symbol.ForwardSlash);
}
default:
{
if (char.IsLetter(c) || c == '_')
{
var buf = new StringBuilder();
while (TryPeek(out c) && (char.IsLetterOrDigit(c) || c == '_'))
buf.Append(Consume());
var value = buf.ToString();
return value switch
{
"func" => new TokenKeyword(line, startColumn, column - startColumn, Keyword.Func),
"let" => new TokenKeyword(line, startColumn, column - startColumn, Keyword.Let),
"if" => new TokenKeyword(line, startColumn, column - startColumn, Keyword.If),
"true" => new TokenBoolLiteral(line, startColumn, column - startColumn, true),
"false" => new TokenBoolLiteral(line, startColumn, column - startColumn, false),
_ => new TokenIdent(line, startColumn, column - startColumn, value)
};
}
throw new Exception($"Unexpected character '{c}'");
}
}
}
private char Consume()
{
if (index >= contents.Length)
throw new Exception("End of tokens");
var c = contents[index];
if (c == '\n')
{
line += 1;
column = 1;
}
else
{
column += 1;
}
index += 1;
return c;
}
private char? Peek(int offset = 0)
{
if (index + offset >= contents.Length)
return null;
return contents[index + offset];
}
private bool TryPeek(out char c)
{
if (index >= contents.Length)
{
c = '\0';
return false;
}
c = contents[index];
return true;
}
}
public abstract class Token(int line, int column, int length)
{
public int Line = line;
public int Column = column;
public int Length = length;
}
public sealed class TokenIdent(int line, int column, int length, string ident) : Token(line, column, length)
{
public string Ident = ident;
}
public sealed class TokenIntLiteral(int line, int column, int length, BigInteger value) : Token(line, column, length)
{
public BigInteger Value = value;
}
public sealed class TokenFloatLiteral(int line, int column, int length, decimal value) : Token(line, column, length)
{
public decimal Value = value;
}
public sealed class TokenStringLiteral(int line, int column, int length, string value) : Token(line, column, length)
{
public string Value = value;
}
public sealed class TokenBoolLiteral(int line, int column, int length, bool value) : Token(line, column, length)
{
public bool Value = value;
}
public enum Symbol
{
OpenCurly,
CloseCurly,
OpenParen,
CloseParen,
Comma,
Colon,
Caret,
Bang,
Equal,
EqualEqual,
BangEqual,
LessThan,
LessThanEqual,
GreaterThan,
GreaterThanEqual,
Plus,
PlusEqual,
Minus,
MinusEqual,
Star,
StarEqual,
ForwardSlash,
ForwardSlashEqual,
}
public sealed class TokenSymbol(int line, int column, int length, Symbol symbol) : Token(line, column, length)
{
public Symbol Symbol = symbol;
}
public enum Keyword
{
Func,
Let,
If,
}
public sealed class TokenKeyword(int line, int column, int length, Keyword keyword) : Token(line, column, length)
{
public Keyword Keyword = keyword;
}