From adc9a2dde155558e120509d298de53122cd82d2f Mon Sep 17 00:00:00 2001 From: zc <2650838+Wack0@users.noreply.github.com> Date: Tue, 11 Jul 2023 17:57:57 +0100 Subject: [PATCH] cc: add timer for each stage (via compile time config) cc: optimise lexer for speed (~10x speed improvement) cc: implement OperatorParser class as per TODO in original codebase (seems refactoring chains of parser-combinators leads to at least SOME speed improvement) --- LibIFPSCC/Driver/Compiler.cs | 19 ++++ LibIFPSCC/Parser/CParsers.cs | 148 ++++++++++++++++++++++++-------- LibIFPSCC/Scanner/Char.cs | 13 +-- LibIFPSCC/Scanner/Float.cs | 11 +-- LibIFPSCC/Scanner/Identifier.cs | 14 +-- LibIFPSCC/Scanner/Int.cs | 11 +-- LibIFPSCC/Scanner/Operator.cs | 13 +-- LibIFPSCC/Scanner/Scanner.cs | 41 ++++++--- LibIFPSCC/Scanner/String.cs | 21 ++--- 9 files changed, 207 insertions(+), 84 deletions(-) diff --git a/LibIFPSCC/Driver/Compiler.cs b/LibIFPSCC/Driver/Compiler.cs index 772b3c9..0735a3b 100644 --- a/LibIFPSCC/Driver/Compiler.cs +++ b/LibIFPSCC/Driver/Compiler.cs @@ -8,30 +8,49 @@ using Parsing; namespace Driver { public class Compiler { + private static readonly bool SHOW_TIME = false; + private static System.Diagnostics.Stopwatch StartTimer() + { + var watch = new System.Diagnostics.Stopwatch(); + watch.Start(); + return watch; + } private Compiler(String source) { this.Source = source; // Lexical analysis + var watch = StartTimer(); Scanner scanner = new Scanner(source); this.Tokens = scanner.Tokens.ToImmutableList(); + watch.Stop(); + if (SHOW_TIME) Console.WriteLine("lexer: {0} ms", watch.ElapsedMilliseconds); // Parse + watch = StartTimer(); var parserResult = CParsers.Parse(this.Tokens); + watch.Stop(); + if (SHOW_TIME) Console.WriteLine("parser: {0} ms", watch.ElapsedMilliseconds); if (!parserResult.IsSuccessful || parserResult.Source.Count() != 1) { throw new InvalidOperationException($"Parsing error:\n{parserResult}"); } this.SyntaxTree = parserResult.Result; // Semantic analysis + watch = StartTimer(); var semantReturn = this.SyntaxTree.GetTranslnUnit(); + watch.Stop(); + if (SHOW_TIME) Console.WriteLine("semant: {0} ms", watch.ElapsedMilliseconds); this.AbstractSyntaxTree = semantReturn.Value; this.Environment = semantReturn.Env; // Code generation + watch = StartTimer(); var state = new CGenState(); this.AbstractSyntaxTree.CodeGenerate(state); state.EmitCallsToCtor(); this.Script = state.Script; + watch.Stop(); + if (SHOW_TIME) Console.WriteLine("codegen: {0} ms", watch.ElapsedMilliseconds); } public static Compiler FromSource(String src) { diff --git a/LibIFPSCC/Parser/CParsers.cs b/LibIFPSCC/Parser/CParsers.cs index 95dc3f1..d2e70ae 100644 --- a/LibIFPSCC/Parser/CParsers.cs +++ b/LibIFPSCC/Parser/CParsers.cs @@ -7,8 +7,10 @@ using static Parsing.ParserCombinator; using System.Collections.Immutable; namespace Parsing { - public partial class CParsers { - static CParsers() { + public partial class CParsers + { + static CParsers() + { SetExpressionRules(); SetDeclarationRules(); SetExternalDefinitionRules(); @@ -18,44 +20,56 @@ namespace Parsing { public static IParserResult Parse(IEnumerable tokens) => TranslationUnit.Parse(new ParserInput(new ParserEnvironment(), tokens)); - public class ConstCharParser : IParser { + public class ConstCharParser : IParser + { public RuleCombining Combining => RuleCombining.NONE; - public IParserResult Parse(ParserInput input) { + public IParserResult Parse(ParserInput input) + { var token = input.Source.First() as TokenCharConst; - if (token == null) { + if (token == null) + { return new ParserFailed(input); } return ParserSucceeded.Create(new IntLiteral(token.Value, TokenInt.IntSuffix.NONE), input.Environment, input.Source.Skip(1)); } } - public class ConstIntParser : IParser { + public class ConstIntParser : IParser + { public RuleCombining Combining => RuleCombining.NONE; - public IParserResult Parse(ParserInput input) { + public IParserResult Parse(ParserInput input) + { var token = input.Source.First() as TokenInt; - if (token == null) { + if (token == null) + { return new ParserFailed(input); } return ParserSucceeded.Create(new IntLiteral(token.Val, token.Suffix), input.Environment, input.Source.Skip(1)); } } - public class ConstFloatParser : IParser { + public class ConstFloatParser : IParser + { public RuleCombining Combining => RuleCombining.NONE; - public IParserResult Parse(ParserInput input) { + public IParserResult Parse(ParserInput input) + { var token = input.Source.First() as TokenFloat; - if (token == null) { + if (token == null) + { return new ParserFailed(input); } return ParserSucceeded.Create(new FloatLiteral(token.Value, token.Suffix), input.Environment, input.Source.Skip(1)); } } - public class StringLiteralParser : IParser { + public class StringLiteralParser : IParser + { public RuleCombining Combining => RuleCombining.NONE; - public IParserResult Parse(ParserInput input) { + public IParserResult Parse(ParserInput input) + { var token = input.Source.First() as TokenString; - if (token == null) { + if (token == null) + { return new ParserFailed(input); } return ParserSucceeded.Create(new StringLiteral(token.Val), input.Environment, input.Source.Skip(1)); @@ -76,8 +90,10 @@ namespace Parsing { } } - public class BinaryOperatorBuilder { - public BinaryOperatorBuilder(IConsumer operatorConsumer, Func nodeCreator) { + public class BinaryOperatorBuilder + { + public BinaryOperatorBuilder(IConsumer operatorConsumer, Func nodeCreator) + { this.OperatorConsumer = operatorConsumer; this.NodeCreator = nodeCreator; } @@ -89,29 +105,93 @@ namespace Parsing { public Func NodeCreator { get; } } - // TODO: create a dedicated class for this. - public static IParser BinaryOperator(IParser operandParser, params BinaryOperatorBuilder[] builders) { - ImmutableList> transformers = builders.Select(builder => - Given() - .Then(builder.OperatorConsumer) - .Then(operandParser) - .Then(builder.NodeCreator) - ).ToImmutableList(); - return operandParser.Then((new OrTransformer(transformers)).ZeroOrMore()); + public class OperatorParser : IParser + { + private IParser lhsParser; + private IParser rhsParser; + private readonly ImmutableList builders; + private readonly bool needsOne; + + public OperatorParser(IParser operandParser, IEnumerable builders) : this(operandParser, operandParser, builders) + { + needsOne = false; + } + + public OperatorParser(IParser lhsParser, IParser rhsParser, IEnumerable builders) + { + this.lhsParser = lhsParser; + this.rhsParser = rhsParser; + this.builders = builders.ToImmutableList(); + needsOne = true; + } + + public RuleCombining Combining => RuleCombining.THEN; + + public IParserResult Parse(ParserInput input) + { + var firstResult = lhsParser.Parse(input); + if (!firstResult.IsSuccessful) + { + return new ParserFailed(firstResult); + } + + return Transform(firstResult.Result, firstResult.ToInput()); + } + + private IParserResult TransformImpl(Expr seed, ParserInput input) + { + List failed = new List(); + foreach (var builder in builders) { + var given = ParserSucceeded.Create(seed, input.Environment, input.Source); + var result1 = builder.OperatorConsumer.Consume(given.ToInput()); + if (!result1.IsSuccessful) + { + failed.Add(new ParserFailed(result1)); + continue; + } + var result2 = rhsParser.Parse(result1.ToInput()); + if (!result2.IsSuccessful) + { + failed.Add(new ParserFailed(result2)); + continue; + } + + var transform = builder.NodeCreator(seed, result2.Result); + var ret = ParserSucceeded.Create(transform, result2.Environment, result2.Source); + var expr = transform as IStoredLineInfo; + if (expr != null) + { + expr.Copy(ret); + } + return ret; + } + return new ParserFailed(input, failed); + } + + public IParserResult Transform(Expr seed, ParserInput input) + { + IParserResult curResult = needsOne ? TransformImpl(seed, input) : ParserSucceeded.Create(seed, input.Environment, input.Source); + + if (!curResult.IsSuccessful) return new ParserFailed(curResult); + + IParserResult lastSuccessfulResult; + do + { + lastSuccessfulResult = curResult; + curResult = TransformImpl(lastSuccessfulResult.Result, lastSuccessfulResult.ToInput()); + } while (curResult.IsSuccessful); + + return lastSuccessfulResult; + } } + public static IParser BinaryOperator(IParser operandParser, params BinaryOperatorBuilder[] builders) + => new OperatorParser(operandParser, builders); + public static IParser AssignmentOperator( IParser lhsParser, IParser rhsParser, params BinaryOperatorBuilder[] builders - ) { - var transformers = builders.Select(builder => - Given() - .Then(builder.OperatorConsumer) - .Then(rhsParser) - .Then(builder.NodeCreator) - ).ToImmutableList(); - return lhsParser.Then((new OrTransformer(transformers)).OneOrMore()); - } + ) => new OperatorParser(lhsParser, rhsParser, builders); } } \ No newline at end of file diff --git a/LibIFPSCC/Scanner/Char.cs b/LibIFPSCC/Scanner/Char.cs index 2654c02..c4104c2 100644 --- a/LibIFPSCC/Scanner/Char.cs +++ b/LibIFPSCC/Scanner/Char.cs @@ -1,4 +1,5 @@ using System; +using System.Text; namespace LexicalAnalysis { /// @@ -61,7 +62,7 @@ namespace LexicalAnalysis { } private State _state; - private String _scanned; + private StringBuilder _scanned; // quote : Char // ============ @@ -71,11 +72,11 @@ namespace LexicalAnalysis { public FSAChar(Char quote) { this._state = State.START; this._quote = quote; - this._scanned = ""; + this._scanned = new StringBuilder(); } public override void Reset() { - this._scanned = ""; + this._scanned.Clear(); this._state = State.START; } @@ -106,7 +107,7 @@ namespace LexicalAnalysis { // ========================== // public String RetrieveRaw() { - return this._scanned.Substring(0, this._scanned.Length - 1); + return this._scanned.ToString(0, this._scanned.Length - 1); } // RetrieveChar : () -> Char @@ -157,7 +158,7 @@ namespace LexicalAnalysis { // Implementation of the FSA // public override void ReadChar(Char ch) { - this._scanned = this._scanned + ch; + this._scanned = this._scanned.Append(ch); switch (this._state) { case State.END: case State.ERROR: @@ -230,7 +231,7 @@ namespace LexicalAnalysis { // ================== // public override void ReadEOF() { - this._scanned = this._scanned + '0'; + this._scanned = this._scanned.Append('0'); switch (this._state) { case State.C: case State.SO: diff --git a/LibIFPSCC/Scanner/Float.cs b/LibIFPSCC/Scanner/Float.cs index 29fb746..ec96e88 100644 --- a/LibIFPSCC/Scanner/Float.cs +++ b/LibIFPSCC/Scanner/Float.cs @@ -1,4 +1,5 @@ using System; +using System.Text; namespace LexicalAnalysis { /// @@ -60,7 +61,7 @@ namespace LexicalAnalysis { DPL }; - private String _raw; + private StringBuilder _raw; private Int64 _intPart; private Int64 _fracPart; private Int64 _fracCount; @@ -77,7 +78,7 @@ namespace LexicalAnalysis { this._expPart = 0; this._suffix = TokenFloat.FloatSuffix.NONE; this._expPos = true; - this._raw = ""; + this._raw = new StringBuilder(); } public override void Reset() { @@ -88,7 +89,7 @@ namespace LexicalAnalysis { this._expPart = 0; this._suffix = TokenFloat.FloatSuffix.NONE; this._expPos = true; - this._raw = ""; + this._raw.Clear(); } public override FSAStatus GetStatus() { @@ -111,11 +112,11 @@ namespace LexicalAnalysis { } else { val = (this._intPart + this._fracPart * Math.Pow(0.1, this._fracCount)) * Math.Pow(10, -this._expPart); } - return new TokenFloat(val, this._suffix, this._raw.Substring(0, this._raw.Length - 1)); + return new TokenFloat(val, this._suffix, this._raw.ToString(0, this._raw.Length - 1)); } public override void ReadChar(Char ch) { - this._raw += ch; + this._raw.Append(ch); switch (this._state) { case State.ERROR: case State.END: diff --git a/LibIFPSCC/Scanner/Identifier.cs b/LibIFPSCC/Scanner/Identifier.cs index 59a6b3d..8c0d2d3 100644 --- a/LibIFPSCC/Scanner/Identifier.cs +++ b/LibIFPSCC/Scanner/Identifier.cs @@ -1,4 +1,6 @@ using System; +using System.Linq; +using System.Text; namespace LexicalAnalysis { /// @@ -24,16 +26,16 @@ namespace LexicalAnalysis { ID }; private State _state; - private String _scanned; + private StringBuilder _scanned; public FSAIdentifier() { this._state = State.START; - this._scanned = ""; + this._scanned = new StringBuilder(); } public override void Reset() { this._state = State.START; - this._scanned = ""; + this._scanned.Clear(); } public override FSAStatus GetStatus() { @@ -50,7 +52,7 @@ namespace LexicalAnalysis { } public override Token RetrieveToken() { - String name = this._scanned.Substring(0, this._scanned.Length - 1); + String name = this._scanned.ToString(0, this._scanned.Length - 1); if (TokenKeyword.Keywords.ContainsKey(name)) { return new TokenKeyword(TokenKeyword.Keywords[name]); } @@ -58,7 +60,7 @@ namespace LexicalAnalysis { } public override void ReadChar(Char ch) { - this._scanned = this._scanned + ch; + this._scanned = this._scanned.Append(ch); switch (this._state) { case State.END: case State.ERROR: @@ -82,7 +84,7 @@ namespace LexicalAnalysis { } public override void ReadEOF() { - this._scanned = this._scanned + '0'; + this._scanned = this._scanned.Append('0'); switch (this._state) { case State.ID: this._state = State.END; diff --git a/LibIFPSCC/Scanner/Int.cs b/LibIFPSCC/Scanner/Int.cs index dee490f..27cb28b 100644 --- a/LibIFPSCC/Scanner/Int.cs +++ b/LibIFPSCC/Scanner/Int.cs @@ -1,4 +1,5 @@ using System; +using System.Text; namespace LexicalAnalysis { /// @@ -69,21 +70,21 @@ namespace LexicalAnalysis { }; private Int64 _val; - private String _raw; + private StringBuilder _raw; private TokenInt.IntSuffix _suffix; private State _state; public FSAInt() { this._state = State.START; this._val = 0; - this._raw = ""; + this._raw = new StringBuilder(); this._suffix = TokenInt.IntSuffix.NONE; } public override void Reset() { this._state = State.START; this._val = 0; - this._raw = ""; + this._raw.Clear(); this._suffix = TokenInt.IntSuffix.NONE; } @@ -101,11 +102,11 @@ namespace LexicalAnalysis { } public override Token RetrieveToken() { - return new TokenInt(this._val, this._suffix, this._raw.Substring(0, this._raw.Length - 1)); + return new TokenInt(this._val, this._suffix, this._raw.ToString(0, this._raw.Length - 1)); } public override void ReadChar(Char ch) { - this._raw += ch; + this._raw.Append(ch); switch (this._state) { case State.ERROR: case State.END: diff --git a/LibIFPSCC/Scanner/Operator.cs b/LibIFPSCC/Scanner/Operator.cs index 12f2d2b..45f2311 100644 --- a/LibIFPSCC/Scanner/Operator.cs +++ b/LibIFPSCC/Scanner/Operator.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; +using System.Text; namespace LexicalAnalysis { /// @@ -166,16 +167,16 @@ namespace LexicalAnalysis { ); private State _state; - private String _scanned; + private StringBuilder _scanned; public FSAOperator() { this._state = State.START; - this._scanned = ""; + this._scanned = new StringBuilder(); } public override sealed void Reset() { this._state = State.START; - this._scanned = ""; + this._scanned.Clear(); } public override sealed FSAStatus GetStatus() { @@ -192,11 +193,11 @@ namespace LexicalAnalysis { } public override sealed Token RetrieveToken() { - return new TokenOperator(TokenOperator.Operators[this._scanned.Substring(0, this._scanned.Length - 1)]); + return new TokenOperator(TokenOperator.Operators[this._scanned.ToString(0, this._scanned.Length - 1)]); } public override sealed void ReadChar(Char ch) { - this._scanned = this._scanned + ch; + this._scanned = this._scanned.Append(ch); switch (this._state) { case State.END: case State.ERROR: @@ -386,7 +387,7 @@ namespace LexicalAnalysis { } public override sealed void ReadEOF() { - this._scanned = this._scanned + '0'; + this._scanned = this._scanned.Append('0'); switch (this._state) { case State.FINISH: case State.SUB: diff --git a/LibIFPSCC/Scanner/Scanner.cs b/LibIFPSCC/Scanner/Scanner.cs index 2e3ad24..8a54a5b 100644 --- a/LibIFPSCC/Scanner/Scanner.cs +++ b/LibIFPSCC/Scanner/Scanner.cs @@ -36,21 +36,31 @@ namespace LexicalAnalysis { private IEnumerable Lex() { var tokens = new List(); int line = 1, column = 1, lastColumn = column; + char lastChr = '\0'; for (Int32 i = 0; i < this.Source.Length; ++i) { - if (i > 0 && this.Source[i - 1] == '\n') + if (lastChr == '\n') { line++; lastColumn = 1; column = 1; } else column++; - this.FSAs.ForEach(fsa => fsa.ReadChar(this.Source[i])); + bool isRunning = false; + int endIdx = -1; + var chr = Source[i]; + for (int fsaIdx = 0; fsaIdx < FSAs.Count; fsaIdx++) + { + var fsa = FSAs[fsaIdx]; + fsa.ReadChar(chr); + var status = fsa.GetStatus(); + if (status == FSAStatus.RUNNING) isRunning = true; + else if (endIdx == -1 && status == FSAStatus.END) endIdx = fsaIdx; + } // if no running - if (this.FSAs.FindIndex(fsa => fsa.GetStatus() == FSAStatus.RUNNING) == -1) { - Int32 idx = this.FSAs.FindIndex(fsa => fsa.GetStatus() == FSAStatus.END); - if (idx != -1) { - Token token = this.FSAs[idx].RetrieveToken(); + if (!isRunning) { + if (endIdx != -1) { + Token token = this.FSAs[endIdx].RetrieveToken(); if (token.Kind != TokenKind.NONE) { token.Line = line; token.Column = lastColumn; @@ -58,19 +68,26 @@ namespace LexicalAnalysis { tokens.Add(token); } i--; column--; - if (this.Source[i] == '\n') line--; - this.FSAs.ForEach(fsa => fsa.Reset()); + if (lastChr == '\n') line--; + foreach (var fsa in FSAs) fsa.Reset(); } else { Console.WriteLine("error"); } } + if (!isRunning || endIdx == -1) lastChr = chr; } - this.FSAs.ForEach(fsa => fsa.ReadEOF()); + var endIdx2 = -1; + for (int fsaIdx = 0; fsaIdx < FSAs.Count; fsaIdx++) + { + var fsa = FSAs[fsaIdx]; + fsa.ReadEOF(); + if (endIdx2 != -1) continue; + if (fsa.GetStatus() == FSAStatus.END) endIdx2 = fsaIdx; + } // find END - Int32 idx2 = this.FSAs.FindIndex(fsa => fsa.GetStatus() == FSAStatus.END); - if (idx2 != -1) { - Token token = this.FSAs[idx2].RetrieveToken(); + if (endIdx2 != -1) { + Token token = this.FSAs[endIdx2].RetrieveToken(); if (token.Kind != TokenKind.NONE) { token.Line = line; token.Column = column + 1; diff --git a/LibIFPSCC/Scanner/String.cs b/LibIFPSCC/Scanner/String.cs index b79b4e8..6b22714 100644 --- a/LibIFPSCC/Scanner/String.cs +++ b/LibIFPSCC/Scanner/String.cs @@ -1,4 +1,5 @@ using System; +using System.Text; namespace LexicalAnalysis { /// @@ -54,22 +55,22 @@ namespace LexicalAnalysis { private State _state; private readonly FSAChar _fsachar; - private String _val; - private String _raw; + private StringBuilder _val; + private StringBuilder _raw; private bool unicode = false; public FSAString() { this._state = State.START; this._fsachar = new FSAChar('\"'); - this._raw = ""; - this._val = ""; + this._raw = new StringBuilder(); + this._val = new StringBuilder(); } public override void Reset() { this._state = State.START; this._fsachar.Reset(); - this._raw = ""; - this._val = ""; + this._raw.Clear(); + this._val.Clear(); unicode = false; } @@ -87,8 +88,8 @@ namespace LexicalAnalysis { } public override Token RetrieveToken() { - if (unicode) return new TokenUnicodeString(this._val, this._raw); - return new TokenString(this._val, this._raw); + if (unicode) return new TokenUnicodeString(this._val.ToString(), this._raw.ToString()); + return new TokenString(this._val.ToString(), this._raw.ToString()); } public override void ReadChar(Char ch) { @@ -129,8 +130,8 @@ namespace LexicalAnalysis { switch (this._fsachar.GetStatus()) { case FSAStatus.END: this._state = State.Q; - this._val = this._val + this._fsachar.RetrieveChar(); - this._raw = this._raw + this._fsachar.RetrieveRaw(); + this._val.Append(this._fsachar.RetrieveChar()); + this._raw.Append(this._fsachar.RetrieveRaw()); this._fsachar.Reset(); ReadChar(ch); break;