simple compiler (most intermediate language generation missing)

This commit is contained in:
Thomas Lindner 2023-01-04 23:32:55 +01:00
commit 529e491ab6
13 changed files with 481 additions and 0 deletions

205
.clang-format Normal file
View file

@ -0,0 +1,205 @@
---
Language: Cpp
# BasedOnStyle: Google
AccessModifierOffset: -1
AlignAfterOpenBracket: Align
AlignArrayOfStructures: None
AlignConsecutiveMacros: None
AlignConsecutiveAssignments: None
AlignConsecutiveBitFields: None
AlignConsecutiveDeclarations: None
AlignEscapedNewlines: Left
AlignOperands: Align
AlignTrailingComments: true
AllowAllArgumentsOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortEnumsOnASingleLine: false
AllowShortBlocksOnASingleLine: Never
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Empty
AllowShortLambdasOnASingleLine: All
AllowShortIfStatementsOnASingleLine: Never
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: Yes
AttributeMacros:
- __capability
BinPackArguments: true
BinPackParameters: true
BraceWrapping:
AfterCaseLabel: false
AfterClass: false
AfterControlStatement: Never
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
BeforeLambdaBody: false
BeforeWhile: false
IndentBraces: false
SplitEmptyFunction: true
SplitEmptyRecord: true
SplitEmptyNamespace: true
BreakBeforeBinaryOperators: None
BreakBeforeConceptDeclarations: true
BreakBeforeBraces: Attach
BreakBeforeInheritanceComma: false
BreakInheritanceList: BeforeColon
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DeriveLineEnding: true
DerivePointerAlignment: true
DisableFormat: false
EmptyLineAfterAccessModifier: Never
EmptyLineBeforeAccessModifier: LogicalBlock
ExperimentalAutoDetectBinPacking: false
BasedOnStyle: ''
ConstructorInitializerAllOnOneLineOrOnePerLine: true
AllowAllConstructorInitializersOnNextLine: true
FixNamespaceComments: true
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IfMacros:
- KJ_IF_MAYBE
IncludeBlocks: Regroup
IncludeCategories:
- Regex: '^<xlang.*\.h>$'
Priority: 3
- Regex: '^<sys/.*\.h>$'
Priority: 1
- Regex: '^<.*\.h>$'
Priority: 2
- Regex: '.*'
Priority: 3
IncludeIsMainRegex: '([-_](test|unittest))?$'
IncludeIsMainSourceRegex: ''
IndentAccessModifiers: false
IndentCaseLabels: true
IndentCaseBlocks: false
IndentGotoLabels: true
IndentPPDirectives: None
IndentExternBlock: AfterExternBlock
IndentRequires: false
IndentWidth: 2
IndentWrappedFunctionNames: false
InsertTrailingCommas: None
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
LambdaBodyIndentation: Signature
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBinPackProtocolList: Never
ObjCBlockIndentWidth: 2
ObjCBreakBeforeNestedBlockParam: true
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PenaltyIndentedWhitespace: 0
PointerAlignment: Left
PPIndentWidth: -1
RawStringFormats:
- Language: Cpp
Delimiters:
- cc
- CC
- cpp
- Cpp
- CPP
- 'c++'
- 'C++'
CanonicalDelimiter: ''
BasedOnStyle: google
- Language: TextProto
Delimiters:
- pb
- PB
- proto
- PROTO
EnclosingFunctions:
- EqualsProto
- EquivToProto
- PARSE_PARTIAL_TEXT_PROTO
- PARSE_TEST_PROTO
- PARSE_TEXT_PROTO
- ParseTextOrDie
- ParseTextProtoOrDie
- ParseTestProto
- ParsePartialTestProto
CanonicalDelimiter: pb
BasedOnStyle: google
ReferenceAlignment: Pointer
ReflowComments: true
ShortNamespaceLines: 1
SortIncludes: CaseSensitive
SortJavaStaticImport: Before
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeCaseColon: false
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceAroundPointerQualifiers: Default
SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: Never
SpacesInConditionalStatement: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInLineCommentPrefix:
Minimum: 1
Maximum: -1
SpacesInParentheses: false
SpacesInSquareBrackets: false
SpaceBeforeSquareBrackets: false
BitFieldColonSpacing: Both
Standard: Auto
StatementAttributeLikeMacros:
- Q_EMIT
StatementMacros:
- Q_UNUSED
- QT_REQUIRE_VERSION
TabWidth: 8
UseCRLF: false
UseTab: Never
WhitespaceSensitiveMacros:
- STRINGIZE
- PP_STRINGIZE
- BOOST_PP_STRINGIZE
- NS_SWIFT_NAME
- CF_SWIFT_NAME
...

1
.clang-format-include Normal file
View file

@ -0,0 +1 @@
bootstrap/**/*

10
.editorconfig Normal file
View file

@ -0,0 +1,10 @@
root = true
[*]
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
charset = utf-8
indent_style = space
indent_size = 2
max_line_length = 80

38
bootstrap/emit.cc Normal file
View file

@ -0,0 +1,38 @@
#include <emit.hh>
#include <iostream>
namespace xlang {
EmitListener::EmitListener(std::string_view outputfile) : output{outputfile} {}
void EmitListener::enterFile(xlangParser::FileContext *ctx) {
output << "data $printformat = { b \"%ld\\n\", b 0 }" << std::endl;
(void)ctx;
}
void EmitListener::enterFunction(xlangParser::FunctionContext *ctx) {
output << std::endl
<< "export function w $" << ctx->Identifier()->getSymbol()->getText()
<< "()" << std::endl
<< "{" << std::endl
<< "@start" << std::endl;
}
void EmitListener::exitFunction(xlangParser::FunctionContext *ctx) {
output << " ret 0" << std::endl << "}" << std::endl;
(void)ctx;
}
void EmitListener::exitStatement(xlangParser::StatementContext *ctx) {
if (ctx->Print()) {
output << " call $printf(l $printformat, ..., w %v)" << std::endl;
}
}
void EmitListener::exitFactor(xlangParser::FactorContext *ctx) {
if (auto integer = ctx->Integer()) {
output << " %v = w copy " << integer->getSymbol()->getText() << std::endl;
}
}
} // namespace xlang

22
bootstrap/emit.hh Normal file
View file

@ -0,0 +1,22 @@
#pragma once
#include <fstream>
#include <string_view>
#include <xlangBaseListener.h>
namespace xlang {
class EmitListener : public xlangBaseListener {
std::ofstream output;
public:
EmitListener(std::string_view outputfile);
void enterFile(xlangParser::FileContext *ctx) override;
void enterFunction(xlangParser::FunctionContext *ctx) override;
void exitFunction(xlangParser::FunctionContext *ctx) override;
void exitStatement(xlangParser::StatementContext *ctx) override;
void exitFactor(xlangParser::FactorContext *ctx) override;
};
} // namespace xlang

49
bootstrap/main.cc Normal file
View file

@ -0,0 +1,49 @@
#include <stdlib.h>
#include <unistd.h>
#include <emit.hh>
#include <iostream>
#include <string>
#include <xlangLexer.h>
#include <xlangParser.h>
static void usage() {
std::cerr << getprogname() << " [-o output.ssa] input.x" << std::endl;
exit(1);
}
int main(int argc, char **argv) {
int opt;
std::string inputfile;
std::string outputfile;
while ((opt = getopt(argc, argv, "o:")) != -1) {
switch (opt) {
case 'o':
outputfile = optarg;
break;
default:
usage();
}
}
argc -= optind;
argv += optind;
if (argc != 1) {
usage();
}
inputfile = argv[0];
if (!outputfile.length()) {
outputfile = "/dev/stdout";
}
std::ifstream inputstream{inputfile};
antlr4::ANTLRInputStream input{inputstream};
xlang::xlangLexer lexer{&input};
antlr4::CommonTokenStream tokens{&lexer};
xlang::xlangParser parser{&tokens};
auto *tree = parser.file();
xlang::EmitListener emit{outputfile};
antlr4::tree::ParseTreeWalker::DEFAULT.walk(&emit, tree);
return 0;
}

13
bootstrap/meson.build Normal file
View file

@ -0,0 +1,13 @@
xc_exe = executable('xc',
sources : [
'main.cc',
'emit.cc',
antlr4.process('xlang.g4'),
],
dependencies : [
antlr4_runtime_dep,
],
install : true)
xc = generator(xc_exe,
arguments : ['-o', '@OUTPUT@', '@INPUT@'],
output : '@BASENAME@.ssa')

52
bootstrap/xlang.g4 Normal file
View file

@ -0,0 +1,52 @@
grammar xlang;
file : function+ EOF;
function : Identifier LeftParen argumentList? RightParen block;
argumentList : Identifier (Comma Identifier)*;
block : LeftBrace statement* RightBrace;
statement : Identifier Assign expr Semicolon
| If expr block (Else block)?
| While expr block
| Return expr Semicolon
| Print expr Semicolon
| expr Semicolon
;
expr : sum ((Less|LessEqual|Greater|GreaterEqual|Equal|NotEqual) sum)*;
sum : term ((Plus|Minus) term)*;
term : factor ((Mul|Div) factor)*;
factor : Integer
| Identifier
| Identifier LeftParen exprList? RightParen
| LeftParen expr RightParen
;
exprList : expr (Comma expr)*;
If : 'if';
Else : 'else';
While : 'while';
Return : 'return';
Print : 'print';
LeftParen : '(';
RightParen : ')';
LeftBrace : '{';
RightBrace : '}';
Assign : '=';
Less : '<';
LessEqual : '<=';
Greater : '>';
GreaterEqual : '>=';
Equal : '==';
NotEqual : '!=';
Plus : '+';
Minus : '-';
Mul : '*';
Div : '/';
Comma : ',';
Semicolon : ';';
Identifier : [a-zA-Z][a-zA-Z0-9]*;
Integer : [0-9]+;
Comment : '//' ~[\n]* '\n' -> skip;
Whitespace : [ \t\r\n]+ -> skip;

40
meson.build Normal file
View file

@ -0,0 +1,40 @@
project('xlang', ['cpp', 'c'],
version : '0.1',
license : 'ISC',
default_options : ['buildtype=debugoptimized',
'warning_level=3',
'cpp_std=c++20'])
antlr4_runtime_dep = dependency('antlr4-runtime', modules : ['antlr4_shared'])
antlr4_exe = find_program('antlr4')
antlr4 = generator(antlr4_exe,
arguments : [
'-o', '@BUILD_DIR@',
'-package', meson.project_name(),
'-visitor',
'-Dlanguage=Cpp',
'-Xexact-output-dir',
'@INPUT@',
],
output : [
'@BASENAME@Lexer.h',
'@BASENAME@Lexer.cpp',
'@BASENAME@Parser.h',
'@BASENAME@Parser.cpp',
'@BASENAME@Visitor.h',
'@BASENAME@Visitor.cpp',
'@BASENAME@BaseVisitor.h',
'@BASENAME@BaseVisitor.cpp',
'@BASENAME@Listener.h',
'@BASENAME@Listener.cpp',
'@BASENAME@BaseListener.h',
'@BASENAME@BaseListener.cpp',
])
qbe_exe = find_program('qbe')
qbe = generator(qbe_exe,
arguments : ['-o', '@OUTPUT@', '@INPUT@'],
output : '@BASENAME@.S')
subdir('bootstrap')
subdir('test')

3
test/42.x Normal file
View file

@ -0,0 +1,3 @@
main() {
print 42;
}

18
test/fib.x Normal file
View file

@ -0,0 +1,18 @@
main() {
// print 5th fibonacci number
print fib(5);
return 0;
}
fib(n) {
x0 = 1;
x1 = 1;
i = 0;
while i < n {
t = x0 + x1;
x0 = x1;
x1 = t;
i = i + 1;
}
return x0;
}

9
test/hello.ssa Normal file
View file

@ -0,0 +1,9 @@
# Define the string constant.
data $str = { b "hello world", b 0 }
export function w $main() {
@start
# Call the puts function with $str as argument.
%r = w call $puts(l $str)
ret 0
}

21
test/meson.build Normal file
View file

@ -0,0 +1,21 @@
executable('hello',
sources : qbe.process('hello.ssa'),
link_args : ['-static'])
# XXX why is this needed?
custom_target('42.ssa',
command : ['cp', '@INPUT@', '@OUTPUT@'],
input : xc.process('42.x'),
output : '@BASENAME@.ssa')
executable('42',
sources : qbe.process(xc.process('42.x')),
link_args : ['-static'])
# XXX why is this needed?
custom_target('fib.ssa',
command : ['cp', '@INPUT@', '@OUTPUT@'],
input : xc.process('fib.x'),
output : '@BASENAME@.ssa')
executable('fib',
sources : qbe.process(xc.process('fib.x')),
link_args : ['-static'])