From 529e491ab651a03976cb51366f89e0f0df3a5e6e Mon Sep 17 00:00:00 2001 From: Thomas Lindner Date: Wed, 4 Jan 2023 23:32:55 +0100 Subject: [PATCH] simple compiler (most intermediate language generation missing) --- .clang-format | 205 ++++++++++++++++++++++++++++++++++++++++++ .clang-format-include | 1 + .editorconfig | 10 +++ bootstrap/emit.cc | 38 ++++++++ bootstrap/emit.hh | 22 +++++ bootstrap/main.cc | 49 ++++++++++ bootstrap/meson.build | 13 +++ bootstrap/xlang.g4 | 52 +++++++++++ meson.build | 40 +++++++++ test/42.x | 3 + test/fib.x | 18 ++++ test/hello.ssa | 9 ++ test/meson.build | 21 +++++ 13 files changed, 481 insertions(+) create mode 100644 .clang-format create mode 100644 .clang-format-include create mode 100644 .editorconfig create mode 100644 bootstrap/emit.cc create mode 100644 bootstrap/emit.hh create mode 100644 bootstrap/main.cc create mode 100644 bootstrap/meson.build create mode 100644 bootstrap/xlang.g4 create mode 100644 meson.build create mode 100644 test/42.x create mode 100644 test/fib.x create mode 100644 test/hello.ssa create mode 100644 test/meson.build diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..5f8be78 --- /dev/null +++ b/.clang-format @@ -0,0 +1,205 @@ +--- +Language: Cpp +# BasedOnStyle: Google +AccessModifierOffset: -1 +AlignAfterOpenBracket: Align +AlignArrayOfStructures: None +AlignConsecutiveMacros: None +AlignConsecutiveAssignments: None +AlignConsecutiveBitFields: None +AlignConsecutiveDeclarations: None +AlignEscapedNewlines: Left +AlignOperands: Align +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortEnumsOnASingleLine: false +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: Empty +AllowShortLambdasOnASingleLine: All +AllowShortIfStatementsOnASingleLine: Never +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: Yes +AttributeMacros: + - __capability +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: Never + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeConceptDeclarations: true +BreakBeforeBraces: Attach +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DeriveLineEnding: true +DerivePointerAlignment: true +DisableFormat: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock +ExperimentalAutoDetectBinPacking: false +BasedOnStyle: '' +ConstructorInitializerAllOnOneLineOrOnePerLine: true +AllowAllConstructorInitializersOnNextLine: true +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IfMacros: + - KJ_IF_MAYBE +IncludeBlocks: Regroup +IncludeCategories: + - Regex: '^$' + Priority: 3 + - Regex: '^$' + Priority: 1 + - Regex: '^<.*\.h>$' + Priority: 2 + - Regex: '.*' + Priority: 3 +IncludeIsMainRegex: '([-_](test|unittest))?$' +IncludeIsMainSourceRegex: '' +IndentAccessModifiers: false +IndentCaseLabels: true +IndentCaseBlocks: false +IndentGotoLabels: true +IndentPPDirectives: None +IndentExternBlock: AfterExternBlock +IndentRequires: false +IndentWidth: 2 +IndentWrappedFunctionNames: false +InsertTrailingCommas: None +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false +LambdaBodyIndentation: Signature +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Never +ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PenaltyIndentedWhitespace: 0 +PointerAlignment: Left +PPIndentWidth: -1 +RawStringFormats: + - Language: Cpp + Delimiters: + - cc + - CC + - cpp + - Cpp + - CPP + - 'c++' + - 'C++' + CanonicalDelimiter: '' + BasedOnStyle: google + - Language: TextProto + Delimiters: + - pb + - PB + - proto + - PROTO + EnclosingFunctions: + - EqualsProto + - EquivToProto + - PARSE_PARTIAL_TEXT_PROTO + - PARSE_TEST_PROTO + - PARSE_TEXT_PROTO + - ParseTextOrDie + - ParseTextProtoOrDie + - ParseTestProto + - ParsePartialTestProto + CanonicalDelimiter: pb + BasedOnStyle: google +ReferenceAlignment: Pointer +ReflowComments: true +ShortNamespaceLines: 1 +SortIncludes: CaseSensitive +SortJavaStaticImport: Before +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceAroundPointerQualifiers: Default +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: Never +SpacesInConditionalStatement: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpaceBeforeSquareBrackets: false +BitFieldColonSpacing: Both +Standard: Auto +StatementAttributeLikeMacros: + - Q_EMIT +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 8 +UseCRLF: false +UseTab: Never +WhitespaceSensitiveMacros: + - STRINGIZE + - PP_STRINGIZE + - BOOST_PP_STRINGIZE + - NS_SWIFT_NAME + - CF_SWIFT_NAME +... + diff --git a/.clang-format-include b/.clang-format-include new file mode 100644 index 0000000..f1fa6d7 --- /dev/null +++ b/.clang-format-include @@ -0,0 +1 @@ +bootstrap/**/* diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..8712eb4 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,10 @@ +root = true + +[*] +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +charset = utf-8 +indent_style = space +indent_size = 2 +max_line_length = 80 diff --git a/bootstrap/emit.cc b/bootstrap/emit.cc new file mode 100644 index 0000000..e478853 --- /dev/null +++ b/bootstrap/emit.cc @@ -0,0 +1,38 @@ +#include +#include + +namespace xlang { + +EmitListener::EmitListener(std::string_view outputfile) : output{outputfile} {} + +void EmitListener::enterFile(xlangParser::FileContext *ctx) { + output << "data $printformat = { b \"%ld\\n\", b 0 }" << std::endl; + (void)ctx; +} + +void EmitListener::enterFunction(xlangParser::FunctionContext *ctx) { + output << std::endl + << "export function w $" << ctx->Identifier()->getSymbol()->getText() + << "()" << std::endl + << "{" << std::endl + << "@start" << std::endl; +} + +void EmitListener::exitFunction(xlangParser::FunctionContext *ctx) { + output << " ret 0" << std::endl << "}" << std::endl; + (void)ctx; +} + +void EmitListener::exitStatement(xlangParser::StatementContext *ctx) { + if (ctx->Print()) { + output << " call $printf(l $printformat, ..., w %v)" << std::endl; + } +} + +void EmitListener::exitFactor(xlangParser::FactorContext *ctx) { + if (auto integer = ctx->Integer()) { + output << " %v = w copy " << integer->getSymbol()->getText() << std::endl; + } +} + +} // namespace xlang diff --git a/bootstrap/emit.hh b/bootstrap/emit.hh new file mode 100644 index 0000000..405f63b --- /dev/null +++ b/bootstrap/emit.hh @@ -0,0 +1,22 @@ +#pragma once + +#include +#include +#include + +namespace xlang { + +class EmitListener : public xlangBaseListener { + std::ofstream output; + + public: + EmitListener(std::string_view outputfile); + + void enterFile(xlangParser::FileContext *ctx) override; + void enterFunction(xlangParser::FunctionContext *ctx) override; + void exitFunction(xlangParser::FunctionContext *ctx) override; + void exitStatement(xlangParser::StatementContext *ctx) override; + void exitFactor(xlangParser::FactorContext *ctx) override; +}; + +} // namespace xlang diff --git a/bootstrap/main.cc b/bootstrap/main.cc new file mode 100644 index 0000000..4c4c6c3 --- /dev/null +++ b/bootstrap/main.cc @@ -0,0 +1,49 @@ +#include +#include + +#include +#include +#include +#include +#include + +static void usage() { + std::cerr << getprogname() << " [-o output.ssa] input.x" << std::endl; + exit(1); +} + +int main(int argc, char **argv) { + int opt; + std::string inputfile; + std::string outputfile; + + while ((opt = getopt(argc, argv, "o:")) != -1) { + switch (opt) { + case 'o': + outputfile = optarg; + break; + default: + usage(); + } + } + argc -= optind; + argv += optind; + if (argc != 1) { + usage(); + } + inputfile = argv[0]; + if (!outputfile.length()) { + outputfile = "/dev/stdout"; + } + + std::ifstream inputstream{inputfile}; + antlr4::ANTLRInputStream input{inputstream}; + xlang::xlangLexer lexer{&input}; + antlr4::CommonTokenStream tokens{&lexer}; + xlang::xlangParser parser{&tokens}; + auto *tree = parser.file(); + + xlang::EmitListener emit{outputfile}; + antlr4::tree::ParseTreeWalker::DEFAULT.walk(&emit, tree); + return 0; +} diff --git a/bootstrap/meson.build b/bootstrap/meson.build new file mode 100644 index 0000000..989be3d --- /dev/null +++ b/bootstrap/meson.build @@ -0,0 +1,13 @@ +xc_exe = executable('xc', + sources : [ + 'main.cc', + 'emit.cc', + antlr4.process('xlang.g4'), + ], + dependencies : [ + antlr4_runtime_dep, + ], + install : true) +xc = generator(xc_exe, + arguments : ['-o', '@OUTPUT@', '@INPUT@'], + output : '@BASENAME@.ssa') diff --git a/bootstrap/xlang.g4 b/bootstrap/xlang.g4 new file mode 100644 index 0000000..0c8c36d --- /dev/null +++ b/bootstrap/xlang.g4 @@ -0,0 +1,52 @@ +grammar xlang; + +file : function+ EOF; +function : Identifier LeftParen argumentList? RightParen block; +argumentList : Identifier (Comma Identifier)*; +block : LeftBrace statement* RightBrace; +statement : Identifier Assign expr Semicolon + | If expr block (Else block)? + | While expr block + | Return expr Semicolon + | Print expr Semicolon + | expr Semicolon + ; +expr : sum ((Less|LessEqual|Greater|GreaterEqual|Equal|NotEqual) sum)*; +sum : term ((Plus|Minus) term)*; +term : factor ((Mul|Div) factor)*; +factor : Integer + | Identifier + | Identifier LeftParen exprList? RightParen + | LeftParen expr RightParen + ; +exprList : expr (Comma expr)*; + +If : 'if'; +Else : 'else'; +While : 'while'; +Return : 'return'; +Print : 'print'; + +LeftParen : '('; +RightParen : ')'; +LeftBrace : '{'; +RightBrace : '}'; +Assign : '='; +Less : '<'; +LessEqual : '<='; +Greater : '>'; +GreaterEqual : '>='; +Equal : '=='; +NotEqual : '!='; +Plus : '+'; +Minus : '-'; +Mul : '*'; +Div : '/'; +Comma : ','; +Semicolon : ';'; + +Identifier : [a-zA-Z][a-zA-Z0-9]*; +Integer : [0-9]+; + +Comment : '//' ~[\n]* '\n' -> skip; +Whitespace : [ \t\r\n]+ -> skip; diff --git a/meson.build b/meson.build new file mode 100644 index 0000000..019f676 --- /dev/null +++ b/meson.build @@ -0,0 +1,40 @@ +project('xlang', ['cpp', 'c'], + version : '0.1', + license : 'ISC', + default_options : ['buildtype=debugoptimized', + 'warning_level=3', + 'cpp_std=c++20']) + +antlr4_runtime_dep = dependency('antlr4-runtime', modules : ['antlr4_shared']) +antlr4_exe = find_program('antlr4') +antlr4 = generator(antlr4_exe, + arguments : [ + '-o', '@BUILD_DIR@', + '-package', meson.project_name(), + '-visitor', + '-Dlanguage=Cpp', + '-Xexact-output-dir', + '@INPUT@', + ], + output : [ + '@BASENAME@Lexer.h', + '@BASENAME@Lexer.cpp', + '@BASENAME@Parser.h', + '@BASENAME@Parser.cpp', + '@BASENAME@Visitor.h', + '@BASENAME@Visitor.cpp', + '@BASENAME@BaseVisitor.h', + '@BASENAME@BaseVisitor.cpp', + '@BASENAME@Listener.h', + '@BASENAME@Listener.cpp', + '@BASENAME@BaseListener.h', + '@BASENAME@BaseListener.cpp', + ]) + +qbe_exe = find_program('qbe') +qbe = generator(qbe_exe, + arguments : ['-o', '@OUTPUT@', '@INPUT@'], + output : '@BASENAME@.S') + +subdir('bootstrap') +subdir('test') diff --git a/test/42.x b/test/42.x new file mode 100644 index 0000000..fd51ea0 --- /dev/null +++ b/test/42.x @@ -0,0 +1,3 @@ +main() { + print 42; +} diff --git a/test/fib.x b/test/fib.x new file mode 100644 index 0000000..f6133fb --- /dev/null +++ b/test/fib.x @@ -0,0 +1,18 @@ +main() { + // print 5th fibonacci number + print fib(5); + return 0; +} + +fib(n) { + x0 = 1; + x1 = 1; + i = 0; + while i < n { + t = x0 + x1; + x0 = x1; + x1 = t; + i = i + 1; + } + return x0; +} diff --git a/test/hello.ssa b/test/hello.ssa new file mode 100644 index 0000000..f7297ae --- /dev/null +++ b/test/hello.ssa @@ -0,0 +1,9 @@ +# Define the string constant. +data $str = { b "hello world", b 0 } + +export function w $main() { +@start + # Call the puts function with $str as argument. + %r = w call $puts(l $str) + ret 0 +} diff --git a/test/meson.build b/test/meson.build new file mode 100644 index 0000000..b30449d --- /dev/null +++ b/test/meson.build @@ -0,0 +1,21 @@ +executable('hello', + sources : qbe.process('hello.ssa'), + link_args : ['-static']) + +# XXX why is this needed? +custom_target('42.ssa', + command : ['cp', '@INPUT@', '@OUTPUT@'], + input : xc.process('42.x'), + output : '@BASENAME@.ssa') +executable('42', + sources : qbe.process(xc.process('42.x')), + link_args : ['-static']) + +# XXX why is this needed? +custom_target('fib.ssa', + command : ['cp', '@INPUT@', '@OUTPUT@'], + input : xc.process('fib.x'), + output : '@BASENAME@.ssa') +executable('fib', + sources : qbe.process(xc.process('fib.x')), + link_args : ['-static'])