mirror of
https://github.com/chrim05/nds-console.git
synced 2025-06-19 22:25:40 -04:00
272 lines
7.5 KiB
C++
272 lines
7.5 KiB
C++
#include "nscript.h"
|
|
|
|
std::string NScript::Node::toString()
|
|
{
|
|
std::string temp;
|
|
|
|
switch (kind)
|
|
{
|
|
case NodeKind::ConstNum: return cutTrailingZeros(std::to_string(value.num));
|
|
case NodeKind::ConstString: return "'" + Parser::escapedToEscapes(value.str) + "'";
|
|
case NodeKind::Bin: return value.bin->left.toString() + " " + value.bin->op.toString() + " " + value.bin->right.toString();
|
|
case NodeKind::Una: return value.una->op.toString() + value.una->term.toString();
|
|
|
|
case NodeKind::Call:
|
|
for (uint64_t i = 0; i < value.call->args.size(); i++)
|
|
{
|
|
// when this is not the first arg
|
|
if (i > 0)
|
|
temp.append(", ");
|
|
|
|
temp.append(value.call->args[i].toString());
|
|
}
|
|
|
|
return value.call->name.toString() + "(" + temp + ")";
|
|
|
|
case NodeKind::Plus:
|
|
case NodeKind::Minus:
|
|
case NodeKind::Star:
|
|
case NodeKind::Slash:
|
|
case NodeKind::LPar:
|
|
case NodeKind::RPar:
|
|
case NodeKind::Comma:
|
|
case NodeKind::Bad:
|
|
case NodeKind::Identifier: return value.str;
|
|
case NodeKind::Eof: return "<eof>";
|
|
}
|
|
|
|
panic("unimplemented Node::toString() for some NodeKind");
|
|
return nullptr;
|
|
}
|
|
|
|
NScript::Node NScript::Parser::nextToken()
|
|
{
|
|
// eating all the whitespaces (they have no meaning)
|
|
eatWhitespaces();
|
|
|
|
if (eof())
|
|
return Node(NodeKind::Eof, curPos());
|
|
|
|
auto c = curChar();
|
|
auto t = Node(curPos());
|
|
|
|
if (isIdentifierChar(c, true))
|
|
t = convertToKeywordWhenPossible(collectIdentifierToken());
|
|
else if (isNumChar(c, true))
|
|
t = collectNumToken();
|
|
else if (c == '\'')
|
|
t = collectStringToken();
|
|
else if (arrayContains({'+', '-', '*', '/', '(', ')', ','}, c))
|
|
t = Node(NodeKind(c), (NodeValue) { .str = cstringRealloc(std::string(1, c).c_str()) }, curPos());
|
|
|
|
exprIndex++;
|
|
return t;
|
|
}
|
|
|
|
NScript::Node NScript::Parser::collectStringToken()
|
|
{
|
|
// eating first `'`
|
|
exprIndex++;
|
|
|
|
auto startPos = exprIndex - 1;
|
|
auto seq = collectSequence([this] {
|
|
// any character except `'`, unless it's an escaped character
|
|
return curChar() != '\'' || (curChar(-1) == '\\' && curChar(-2) != '\\');
|
|
});
|
|
auto pos = Position(startPos, exprIndex + 2);
|
|
|
|
// eating the last char of string
|
|
// moving to the last `'`
|
|
exprIndex++;
|
|
|
|
if (eof())
|
|
throw ParserError({"unclosed string"}, Position(startPos, exprIndex));
|
|
|
|
return Node(NodeKind::ConstString, (NodeValue) { .str = cstringRealloc(escapesToEscaped(seq, pos).c_str()) }, pos);
|
|
}
|
|
|
|
NScript::Node NScript::Parser::collectNumToken()
|
|
{
|
|
auto startPos = exprIndex;
|
|
auto seq = collectSequence([this] {
|
|
return isNumChar(curChar(), false);
|
|
});
|
|
auto pos = Position(startPos, exprIndex + 1);
|
|
|
|
// inconsistent numbers like 0.0.1 or 1.2.3 etc
|
|
if (countOccurrences(seq, '.') > 1)
|
|
throw ParserError({"number cannot include more than one dot"}, pos);
|
|
|
|
// when the user wrote something like 0. or 2. etc
|
|
if (seq[seq.length() - 1] == '.')
|
|
throw ParserError(
|
|
{"number cannot end with a dot (correction: `", seq.substr(0, seq.length() - 1), "`)"},
|
|
pos
|
|
);
|
|
|
|
auto value = (NodeValue) {
|
|
.num = atof(seq.c_str())
|
|
};
|
|
|
|
// when the next char is an identifier, the user wrote something like 123hello or 123_
|
|
if (!eof(+1) && isIdentifierChar(curChar(+1), false))
|
|
throw ParserError(
|
|
{"number cannot include part of identifier (correction: `", seq, " ", std::string(1, curChar(+1)), "...`)"},
|
|
Position(pos.startPos, curPos(+1).endPos)
|
|
);
|
|
|
|
return Node(NodeKind::ConstNum, value, pos);
|
|
}
|
|
|
|
NScript::Node NScript::Parser::convertToKeywordWhenPossible(Node token)
|
|
{
|
|
return token;
|
|
|
|
// if (token.kind != NodeKind::Identifier)
|
|
// return token;
|
|
|
|
// if (token.value.str == std::string("true"))
|
|
// token.kind = NodeKind::True;
|
|
// else if (token.value.str == std::string("false"))
|
|
// token.kind = NodeKind::False;
|
|
|
|
// return token;
|
|
}
|
|
|
|
NScript::Node NScript::Parser::collectIdentifierToken()
|
|
{
|
|
auto startPos = exprIndex;
|
|
|
|
// cstringRealloc is called to have the guarantee that the pointer will not be implicitly deallocated in any case.
|
|
// from https://codeql.github.com/codeql-query-help/cpp/cpp-return-c-str-of-std-string/
|
|
// ```
|
|
// The pointer is only safe to use while the std::string is still in scope.
|
|
// When the std::string goes out of scope, its destructor is called and the memory is deallocated, so it is no longer safe to use the pointer.
|
|
// ```
|
|
auto value = (NodeValue) {
|
|
.str = cstringRealloc(collectSequence([this] {
|
|
return isIdentifierChar(curChar(), false);
|
|
}).c_str())
|
|
};
|
|
|
|
return Node(NodeKind::Identifier, value, Position(startPos, exprIndex + 1));
|
|
}
|
|
|
|
std::string NScript::Parser::collectSequence(std::function<bool()> checker)
|
|
{
|
|
auto r = std::string();
|
|
|
|
// as long as it matches a certain character, adds the latter to the string
|
|
while (!eof() && checker())
|
|
{
|
|
r.push_back(curChar());
|
|
exprIndex++;
|
|
}
|
|
|
|
// going back to the last char of sequence
|
|
exprIndex--;
|
|
|
|
return r;
|
|
}
|
|
|
|
NScript::Node NScript::Parser::expectBinaryOrTerm(std::function<Node()> expector, std::vector<NodeKind> operators)
|
|
{
|
|
auto left = expector();
|
|
|
|
// as long as matches one of the required operators, collects the right value and replaces the left one with a BinNode
|
|
while (!eofToken() && arrayContains(operators, curToken.kind))
|
|
{
|
|
auto op = getCurAndAdvance();
|
|
auto right = expector();
|
|
|
|
left = Node(NodeKind::Bin, (NodeValue) { .bin = new BinNode(left, right, op) }, Position(left.pos.startPos, right.pos.endPos));
|
|
}
|
|
|
|
return left;
|
|
}
|
|
|
|
NScript::Node NScript::Parser::expectTerm()
|
|
{
|
|
Node op;
|
|
Node term;
|
|
|
|
switch (getCurAndAdvance().kind)
|
|
{
|
|
// simple token
|
|
case NodeKind::Identifier:
|
|
case NodeKind::ConstNum:
|
|
case NodeKind::ConstString:
|
|
term = prevToken;
|
|
break;
|
|
|
|
// unary expression = +|- term
|
|
case NodeKind::Plus:
|
|
case NodeKind::Minus:
|
|
op = prevToken;
|
|
term = expectTerm();
|
|
term = Node(NodeKind::Una, (NodeValue) { .una = new UnaNode(term, op) }, Position(op.pos.startPos, term.pos.endPos));
|
|
break;
|
|
|
|
case NodeKind::LPar:
|
|
term = expectExpression();
|
|
expectTokenAndAdvance(NodeKind::RPar);
|
|
break;
|
|
|
|
default:
|
|
throw ParserError({"unexpected token (found `", prevToken.toString(), "`)"}, prevToken.pos);
|
|
}
|
|
|
|
if (curToken.kind == NodeKind::LPar)
|
|
term = collectCallNode(term);
|
|
|
|
return term;
|
|
}
|
|
|
|
NScript::Node NScript::Parser::collectCallNode(Node name)
|
|
{
|
|
if (name.kind != NodeKind::Identifier && name.kind != NodeKind::ConstString)
|
|
throw ParserError({"expected string or identifier call name"}, name.pos);
|
|
|
|
auto startPos = curToken.pos.startPos;
|
|
auto args = std::vector<Node>();
|
|
|
|
// eating first `(`
|
|
advance();
|
|
|
|
while (true)
|
|
{
|
|
if (eofToken())
|
|
throw ParserError({"unclosed call parameters list"}, Position(startPos, prevToken.pos.endPos));
|
|
|
|
if (curToken.kind == NodeKind::RPar)
|
|
{
|
|
// eating last `)`
|
|
advance();
|
|
return Node(NodeKind::Call, (NodeValue) { .call = new CallNode(name, args) }, Position(name.pos.startPos, prevToken.pos.endPos));
|
|
}
|
|
|
|
// when this is not the first arg
|
|
if (args.size() > 0)
|
|
expectTokenAndAdvance(NodeKind::Comma);
|
|
|
|
args.push_back(expectExpression());
|
|
}
|
|
}
|
|
|
|
std::string NScript::Parser::escapesToEscaped(std::string s, Position pos)
|
|
{
|
|
std::string t;
|
|
|
|
for (uint64_t i = 0; i < s.length(); i++)
|
|
if (s[i] == '\\')
|
|
{
|
|
t.push_back(escapeChar(s[i + 1], Position(pos.startPos + i, pos.startPos + i + 1)));
|
|
|
|
// skipping the escape code
|
|
i++;
|
|
}
|
|
else
|
|
t.push_back(s[i]);
|
|
|
|
return t;
|
|
} |