nds-console/source/nscript.cpp

#include "nscript.h"

std::string NScript::Node::toString()
{
  std::string temp;

  switch (kind)
  {
    case NodeKind::ConstNum:    return cutTrailingZeros(std::to_string(value.num));
    case NodeKind::ConstString: return "'" + Parser::escapedToEscapes(value.str) + "'";
    case NodeKind::Bin:         return value.bin->left.toString() + " " + value.bin->op.toString() + " " + value.bin->right.toString();
    case NodeKind::Una:         return value.una->op.toString() + value.una->term.toString();

    case NodeKind::Call:
      for (uint64_t i = 0; i < value.call->args.size(); i++)
      {
        // when this is not the first arg
        if (i > 0)
          temp.append(", ");

        temp.append(value.call->args[i].toString());
      }

      return value.call->name.toString() + "(" + temp + ")";

    case NodeKind::Plus:
    case NodeKind::Minus:
    case NodeKind::Star:
    case NodeKind::Slash:
    case NodeKind::LPar:
    case NodeKind::RPar:
    case NodeKind::Comma:
    case NodeKind::Bad:
    case NodeKind::Identifier:  return value.str;
    case NodeKind::Eof:         return "<eof>";
  }

  panic("unimplemented Node::toString() for some NodeKind");
  return nullptr;
}

NScript::Node NScript::Parser::nextToken()
{
  // eating all the whitespaces (they have no meaning)
  eatWhitespaces();

  if (eof())
    return Node(NodeKind::Eof, curPos());

  auto c = curChar();
  auto t = Node(curPos());

  if (isIdentifierChar(c, true))
    t = convertToKeywordWhenPossible(collectIdentifierToken());
  else if (isNumChar(c, true))
    t = collectNumToken();
  else if (c == '\'')
    t = collectStringToken();
  else if (arrayContains({'+', '-', '*', '/', '(', ')', ','}, c))
    t = Node(NodeKind(c), (NodeValue) { .str = cstringRealloc(std::string(1, c).c_str()) }, curPos());

  exprIndex++;
  return t;
}

NScript::Node NScript::Parser::collectStringToken()
{
  // eating first `'`
  exprIndex++;

  auto startPos = exprIndex - 1;
  auto seq      = collectSequence([this] {
    // any character except `'`, unless it's an escaped character
    return curChar() != '\'' || (curChar(-1) == '\\' && curChar(-2) != '\\');
  });
  auto pos = Position(startPos, exprIndex + 2);

  // eating the last char of string
  // moving to the last `'`
  exprIndex++;

  if (eof())
    throw ParserError({"unclosed string"}, Position(startPos, exprIndex));

  return Node(NodeKind::ConstString, (NodeValue) { .str = cstringRealloc(escapesToEscaped(seq, pos).c_str()) }, pos);
}

NScript::Node NScript::Parser::collectNumToken()
{
  auto startPos = exprIndex;
  auto seq      = collectSequence([this] {
    return isNumChar(curChar(), false);
  });
  auto pos      = Position(startPos, exprIndex + 1);

  // inconsistent numbers like 0.0.1 or 1.2.3 etc
  if (countOccurrences(seq, '.') > 1)
    throw ParserError({"number cannot include more than one dot"}, pos);

  // when the user wrote something like 0. or 2. etc
  if (seq[seq.length() - 1] == '.')
    throw ParserError(
      {"number cannot end with a dot (correction: `", seq.substr(0, seq.length() - 1), "`)"},
      pos
    );

  auto value = (NodeValue) {
    .num = atof(seq.c_str())
  };

  // when the next char is an identifier, the user wrote something like 123hello or 123_
  if (!eof(+1) && isIdentifierChar(curChar(+1), false))
    throw ParserError(
      {"number cannot include part of identifier (correction: `", seq, " ", std::string(1, curChar(+1)), "...`)"},
      Position(pos.startPos, curPos(+1).endPos)
    );

  return Node(NodeKind::ConstNum, value, pos);
}

NScript::Node NScript::Parser::convertToKeywordWhenPossible(Node token)
{
  return token;

  // if (token.kind != NodeKind::Identifier)
  //  return token;

  // if (token.value.str == std::string("true"))
  //   token.kind = NodeKind::True;
  // else if (token.value.str == std::string("false"))
  //   token.kind = NodeKind::False;

  // return token;
}

NScript::Node NScript::Parser::collectIdentifierToken()
{
  auto startPos = exprIndex;

  // cstringRealloc is called to have the guarantee that the pointer will not be implicitly deallocated in any case.
  // from https://codeql.github.com/codeql-query-help/cpp/cpp-return-c-str-of-std-string/
  // ```
  //  The pointer is only safe to use while the std::string is still in scope.
  //  When the std::string goes out of scope, its destructor is called and the memory is deallocated, so it is no longer safe to use the pointer.
  // ```
  auto value    = (NodeValue) {
    .str = cstringRealloc(collectSequence([this] {
      return isIdentifierChar(curChar(), false);
    }).c_str())
  };

  return Node(NodeKind::Identifier, value, Position(startPos, exprIndex + 1));
}

std::string NScript::Parser::collectSequence(std::function<bool()> checker)
{
  auto r = std::string();

  // as long as it matches a certain character, adds the latter to the string
  while (!eof() && checker())
  {
    r.push_back(curChar());
    exprIndex++;
  }

  // going back to the last char of sequence
  exprIndex--;

  return r;
}

NScript::Node NScript::Parser::expectBinaryOrTerm(std::function<Node()> expector, std::vector<NodeKind> operators)
{
  auto left = expector();

  // as long as matches one of the required operators, collects the right value and replaces the left one with a BinNode
  while (!eofToken() && arrayContains(operators, curToken.kind))
  {
    auto op = getCurAndAdvance();
    auto right = expector();

    left = Node(NodeKind::Bin, (NodeValue) { .bin = new BinNode(left, right, op) }, Position(left.pos.startPos, right.pos.endPos));
  }

  return left;
}

NScript::Node NScript::Parser::expectTerm()
{
  Node op;
  Node term;

  switch (getCurAndAdvance().kind)
  {
    // simple token
    case NodeKind::Identifier:
    case NodeKind::ConstNum:
    case NodeKind::ConstString:
      term = prevToken;
      break;

    // unary expression = +|- term
    case NodeKind::Plus:
    case NodeKind::Minus:
      op   = prevToken;
      term = expectTerm();
      term = Node(NodeKind::Una, (NodeValue) { .una = new UnaNode(term, op) }, Position(op.pos.startPos, term.pos.endPos));
      break;

    case NodeKind::LPar:
      term = expectExpression();
      expectTokenAndAdvance(NodeKind::RPar);
      break;

    default:
      throw ParserError({"unexpected token (found `", prevToken.toString(), "`)"}, prevToken.pos);
  }

  if (curToken.kind == NodeKind::LPar)
    term = collectCallNode(term);

  return term;
}

NScript::Node NScript::Parser::collectCallNode(Node name)
{
  if (name.kind != NodeKind::Identifier && name.kind != NodeKind::ConstString)
    throw ParserError({"expected string or identifier call name"}, name.pos);

  auto startPos = curToken.pos.startPos;
  auto args     = std::vector<Node>();

  // eating first `(`
  advance();

  while (true)
  {
    if (eofToken())
      throw ParserError({"unclosed call parameters list"}, Position(startPos, prevToken.pos.endPos));

    if (curToken.kind == NodeKind::RPar)
    {
      // eating last `)`
      advance();
      return Node(NodeKind::Call, (NodeValue) { .call = new CallNode(name, args) }, Position(name.pos.startPos, prevToken.pos.endPos));
    }

    // when this is not the first arg
    if (args.size() > 0)
      expectTokenAndAdvance(NodeKind::Comma);

    args.push_back(expectExpression());
  }
}

std::string NScript::Parser::escapesToEscaped(std::string s, Position pos)
{
  std::string t;

  for (uint64_t i = 0; i < s.length(); i++)
    if (s[i] == '\\')
    {
      t.push_back(escapeChar(s[i + 1], Position(pos.startPos + i, pos.startPos + i + 1)));

      // skipping the escape code
      i++;
    }
    else
      t.push_back(s[i]);

  return t;
}