mirror of
https://github.com/Wack0/IFPSTools.NET.git
synced 2025-06-18 10:45:36 -04:00

cc: optimise lexer for speed (~10x speed improvement) cc: implement OperatorParser class as per TODO in original codebase (seems refactoring chains of parser-combinators leads to at least SOME speed improvement)
379 lines
12 KiB
C#
379 lines
12 KiB
C#
using System;
|
|
using System.Text;
|
|
|
|
namespace LexicalAnalysis {
|
|
/// <summary>
|
|
/// A character constant
|
|
/// </summary>
|
|
public sealed class TokenCharConst : Token {
|
|
public TokenCharConst(String raw, Char value) {
|
|
this.Raw = raw;
|
|
this.Value = value;
|
|
}
|
|
|
|
public override TokenKind Kind { get; } = TokenKind.CHAR;
|
|
public String Raw { get; }
|
|
public Char Value { get; }
|
|
public override String ToString() => $"{this.Kind} [{Line}:{Column}]: '{this.Raw}'";
|
|
}
|
|
|
|
/// <summary>
|
|
/// The FSA for scanning a C character.
|
|
/// Note that this FSA doesn't scan the surrounding quotes.
|
|
/// It is used in both FSACharConst and FSAString.
|
|
///
|
|
/// There are multiple ways to represent a character:
|
|
/// * A normal character : any character other than \\ \n or [quote]
|
|
/// Note that [quote] might be \' or \" depending on the context.
|
|
/// For example, inside a String, single quote are allowed, which means that the following code is legal:
|
|
/// Char *str = "single quote here: ' see that?";
|
|
///
|
|
/// However, if we need a double quote inside a String, we have to use an escape character, like this:
|
|
/// Char *str = "double quote needs to be escaped: \" ";
|
|
///
|
|
/// Inside a Char, double quotes are allowed while single quotes need to be escaped.
|
|
/// Char double_quote = '"'; // allowed
|
|
/// Char single_quote = '\''; // needs to be escaped
|
|
///
|
|
/// * An escape character : \a \b \f \n \r \t \v \' \" \\ \?
|
|
/// Note that even though \' and \" might not needs to be escaped, you can always use them as escaped.
|
|
/// If you escape a character not listed above, the behavior is undefined in the standard.
|
|
/// I'll just assume you need the unescaped character.
|
|
/// For example, if you typed '\c', then I'll just treat it as 'c'.
|
|
///
|
|
/// * An octal number after a backslash. For example : \123.
|
|
///
|
|
/// * A hexadecimal number after a backslash and an 'x' or 'X'. FOr example : \xFF.
|
|
///
|
|
/// </summary>
|
|
public sealed class FSAChar : FSA {
|
|
private enum State {
|
|
START,
|
|
END,
|
|
ERROR,
|
|
S,
|
|
C,
|
|
SO,
|
|
SOO,
|
|
SOOO,
|
|
SX,
|
|
SXH,
|
|
SXHH
|
|
}
|
|
|
|
private State _state;
|
|
private StringBuilder _scanned;
|
|
|
|
// quote : Char
|
|
// ============
|
|
// \' in a Char, and \" in a String.
|
|
private readonly Char _quote;
|
|
|
|
public FSAChar(Char quote) {
|
|
this._state = State.START;
|
|
this._quote = quote;
|
|
this._scanned = new StringBuilder();
|
|
}
|
|
|
|
public override void Reset() {
|
|
this._scanned.Clear();
|
|
this._state = State.START;
|
|
}
|
|
|
|
public override FSAStatus GetStatus() {
|
|
if (this._state == State.START) {
|
|
return FSAStatus.NONE;
|
|
}
|
|
if (this._state == State.END) {
|
|
return FSAStatus.END;
|
|
}
|
|
if (this._state == State.ERROR) {
|
|
return FSAStatus.ERROR;
|
|
}
|
|
return FSAStatus.RUNNING;
|
|
}
|
|
|
|
// IsChar : Char -> Boolean
|
|
// ========================
|
|
// the character is a 'normal' Char, other than <quote> \\ or \n
|
|
//
|
|
private Boolean IsChar(Char ch) {
|
|
return ch != this._quote && ch != '\\' && ch != '\n';
|
|
}
|
|
|
|
|
|
|
|
// RetrieveRaw : () -> String
|
|
// ==========================
|
|
//
|
|
public String RetrieveRaw() {
|
|
return this._scanned.ToString(0, this._scanned.Length - 1);
|
|
}
|
|
|
|
// RetrieveChar : () -> Char
|
|
// =========================
|
|
//
|
|
public Char RetrieveChar() {
|
|
if (this._scanned.Length == 3) {
|
|
switch (this._scanned[1]) {
|
|
case 'a':
|
|
return '\a';
|
|
case 'b':
|
|
return '\b';
|
|
case 'f':
|
|
return '\f';
|
|
case 'n':
|
|
return '\n';
|
|
case 'r':
|
|
return '\r';
|
|
case 't':
|
|
return '\t';
|
|
case 'v':
|
|
return '\v';
|
|
case '\'':
|
|
return '\'';
|
|
case '\"':
|
|
return '\"';
|
|
case '\\':
|
|
return '\\';
|
|
case '?':
|
|
return '?';
|
|
default:
|
|
return this._scanned[1];
|
|
}
|
|
}
|
|
return this._scanned[0];
|
|
}
|
|
|
|
// RetrieveToken : () -> Token
|
|
// ===========================
|
|
// Note that this function never gets used, because FSAChar is just an inner FSA for other FSAs.
|
|
//
|
|
public override Token RetrieveToken() {
|
|
return new EmptyToken();
|
|
}
|
|
|
|
// ReadChar : Char -> ()
|
|
// =====================
|
|
// Implementation of the FSA
|
|
//
|
|
public override void ReadChar(Char ch) {
|
|
this._scanned = this._scanned.Append(ch);
|
|
switch (this._state) {
|
|
case State.END:
|
|
case State.ERROR:
|
|
this._state = State.ERROR;
|
|
break;
|
|
case State.START:
|
|
if (IsChar(ch)) {
|
|
this._state = State.C;
|
|
} else if (ch == '\\') {
|
|
this._state = State.S;
|
|
} else {
|
|
this._state = State.ERROR;
|
|
}
|
|
break;
|
|
case State.C:
|
|
this._state = State.END;
|
|
break;
|
|
case State.S:
|
|
if (Utils.IsEscapeChar(ch)) {
|
|
this._state = State.C;
|
|
} else if (Utils.IsOctDigit(ch)) {
|
|
this._state = State.SO;
|
|
} else if (ch == 'x' || ch == 'X') {
|
|
this._state = State.SX;
|
|
} else {
|
|
this._state = State.ERROR;
|
|
}
|
|
break;
|
|
case State.SX:
|
|
if (Utils.IsHexDigit(ch)) {
|
|
this._state = State.SXH;
|
|
} else {
|
|
this._state = State.ERROR;
|
|
}
|
|
break;
|
|
case State.SXH:
|
|
if (Utils.IsHexDigit(ch)) {
|
|
this._state = State.SXHH;
|
|
} else {
|
|
this._state = State.END;
|
|
}
|
|
break;
|
|
case State.SXHH:
|
|
this._state = State.END;
|
|
break;
|
|
case State.SO:
|
|
if (Utils.IsOctDigit(ch)) {
|
|
this._state = State.SOO;
|
|
} else {
|
|
this._state = State.END;
|
|
}
|
|
break;
|
|
case State.SOO:
|
|
if (Utils.IsOctDigit(ch)) {
|
|
this._state = State.SOOO;
|
|
} else {
|
|
this._state = State.END;
|
|
}
|
|
break;
|
|
case State.SOOO:
|
|
this._state = State.END;
|
|
break;
|
|
default:
|
|
this._state = State.ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// ReadEOF : () -> ()
|
|
// ==================
|
|
//
|
|
public override void ReadEOF() {
|
|
this._scanned = this._scanned.Append('0');
|
|
switch (this._state) {
|
|
case State.C:
|
|
case State.SO:
|
|
case State.SOO:
|
|
case State.SOOO:
|
|
case State.SXH:
|
|
case State.SXHH:
|
|
this._state = State.END;
|
|
break;
|
|
default:
|
|
this._state = State.ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
/// <summary>
|
|
/// FSACharConst
|
|
/// ============
|
|
/// The FSA for scanning a C character constant.
|
|
/// Upon finish, we can retrive a token of character.
|
|
///
|
|
/// A character constant can either be represented by
|
|
/// * '[char]'
|
|
/// or
|
|
/// * L'[char]'
|
|
///
|
|
/// The character inside the quotes is read by FSAChar.
|
|
/// Note that if the inner character is a single quote, it needs to be escaped.
|
|
/// </summary>
|
|
public sealed class FSACharConst : FSA {
|
|
private enum State {
|
|
START,
|
|
END,
|
|
ERROR,
|
|
L,
|
|
Q,
|
|
QC,
|
|
QCQ
|
|
};
|
|
|
|
private State _state;
|
|
private Char _val;
|
|
private String _raw;
|
|
private readonly FSAChar _fsachar;
|
|
|
|
public FSACharConst() {
|
|
this._state = State.START;
|
|
this._fsachar = new FSAChar('\'');
|
|
}
|
|
|
|
public override void Reset() {
|
|
this._state = State.START;
|
|
this._fsachar.Reset();
|
|
}
|
|
|
|
public override FSAStatus GetStatus() {
|
|
if (this._state == State.START) {
|
|
return FSAStatus.NONE;
|
|
}
|
|
if (this._state == State.END) {
|
|
return FSAStatus.END;
|
|
}
|
|
if (this._state == State.ERROR) {
|
|
return FSAStatus.ERROR;
|
|
}
|
|
return FSAStatus.RUNNING;
|
|
}
|
|
|
|
public override Token RetrieveToken() {
|
|
return new TokenCharConst(this._raw, this._val);
|
|
}
|
|
|
|
public override void ReadChar(Char ch) {
|
|
switch (this._state) {
|
|
case State.END:
|
|
case State.ERROR:
|
|
this._state = State.ERROR;
|
|
break;
|
|
case State.START:
|
|
switch (ch) {
|
|
case 'L':
|
|
this._state = State.L;
|
|
break;
|
|
case '\'':
|
|
this._state = State.Q;
|
|
this._fsachar.Reset();
|
|
break;
|
|
default:
|
|
this._state = State.ERROR;
|
|
break;
|
|
}
|
|
break;
|
|
case State.L:
|
|
if (ch == '\'') {
|
|
this._state = State.Q;
|
|
this._fsachar.Reset();
|
|
} else {
|
|
this._state = State.ERROR;
|
|
}
|
|
break;
|
|
case State.Q:
|
|
this._fsachar.ReadChar(ch);
|
|
switch (this._fsachar.GetStatus()) {
|
|
case FSAStatus.END:
|
|
this._state = State.QC;
|
|
this._raw = this._fsachar.RetrieveRaw();
|
|
this._val = this._fsachar.RetrieveChar();
|
|
this._fsachar.Reset();
|
|
ReadChar(ch);
|
|
break;
|
|
case FSAStatus.ERROR:
|
|
this._state = State.ERROR;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
case State.QC:
|
|
if (ch == '\'') {
|
|
this._state = State.QCQ;
|
|
} else {
|
|
this._state = State.ERROR;
|
|
}
|
|
break;
|
|
case State.QCQ:
|
|
this._state = State.END;
|
|
break;
|
|
default:
|
|
this._state = State.ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
public override void ReadEOF() {
|
|
if (this._state == State.QCQ) {
|
|
this._state = State.END;
|
|
} else {
|
|
this._state = State.ERROR;
|
|
}
|
|
}
|
|
|
|
}
|
|
} |