// IMPORTANT NOTE: any changes to this file should be applied manually in Data Blaze, to:
// - backend/src/baserow/contrib/database/api/query/lexer_utils.py
// - backend/src/baserow/contrib/database/api/query/sql_lexer.py
// - backend/src/baserow/contrib/database/api/query/lexer_utils.py

/* BEGIN_REMOVE_IN_PYTHON */
import { processCommandInAttribute } from './Parser';
import { Environment } from './DataContainer'; // eslint-disable-line no-unused-vars


const SIMPLE_COMMAND_REGEX = /^\{[A-Za-z-]+\}/; // e.g. {foo}
/* END_REMOVE_IN_PYTHON */

const START_WS_REGEX = /^\s+/;

export const BASIC_TOKENS = {
  '+': 'PLUS',
  '->': 'CALLS',
  '-': 'MINUS',
  '/': 'DIVIDE',
  '*': 'MULTIPLY',
  '^': 'POWER',
  '(': 'LPAREN',
  ')': 'RPAREN',
  '.': 'PERIOD',
  '[': 'LBRACKET',
  ']': 'RBRACKET',
  '===': 'STRONG_EQUALITY',
  '==': 'EQUALITY',
  '=': 'EQUALS',
  '<>': 'NOT_EQUALITY',
  ':': 'LISTASSIGNS',
  '>=': 'GTE',
  '<=': 'LTE',
  '<': 'LT',
  '>': 'GT',
  '&': 'AMP',
  ',': 'COMMA'
};

export const KEYWORD_TOKENS = [
  'YES', 'NO', 'NOT', 'AND', 'OR', 'IF', 'ELSE', 'FOR', 'IN', 'ENDFOR', 'RETURN', 'ENDIF', 'ELSEIF', 'BLOCK', 'ENDBLOCK', 'TRY', 'CATCHERROR', 'ENDTRY', 'VAR'
];

/**
 * @typedef {'basic'|'keyword'|'sql'} TrieNodeKind
 **/

/**
 * @typedef {{leaf?: boolean, kind?: TrieNodeKind, trie: {[char: string] : TrieNode}}} TrieNode
 **/

/**
 * @typedef {{ tokens: string[], kind: TrieNodeKind}} TokensWithKind
 **/

/**
 * @param {TokensWithKind[]} tokensWithKind
 * @returns {TrieNode}
**/
function tokensToTrie(tokensWithKind) {
  /** @type {TrieNode} **/
  const trie = { trie: {} };
  for (let i = 0; i < tokensWithKind.length; i++) {
    const tokens = tokensWithKind[i].tokens;
    const kind = tokensWithKind[i].kind;
    for (let j = 0; j < tokens.length; j++) {
      let currentRoot = trie;
      const tokenUpper = tokens[j].toUpperCase();
      const tokenLower = tokens[j].toLowerCase();
      for (let j = 0; j < tokenUpper.length; j++) {
        const char = tokenUpper[j];
        if (!currentRoot.trie[char]) {
          currentRoot.trie[char] = { leaf: false, trie: {} };
          currentRoot.trie[tokenLower[j]] = currentRoot.trie[char];
        }
        currentRoot = currentRoot.trie[char];
      }
      currentRoot.leaf = true;
      currentRoot.kind = kind;
    }
  }

  return trie;
}

/* BEGIN_REMOVE_IN_PYTHON */
const LEX_TRIE = tokensToTrie([
  {
    tokens: Object.keys(BASIC_TOKENS),
    kind: 'basic',
  },
  {
    tokens: KEYWORD_TOKENS,
    kind: 'keyword',
  },
]);

/**
 * For when we want to be able to parse an invalid command.
 *
 * Note that this cannot be fully accurate as we don't know attributes
 * are equations and which aren't.
 *
 * @param {string} str - str possibly starting with an invalid command
 *
 * @return {string} the parsed command the string starts with
 */
function parseBrokenCommand(str) {
  if (SIMPLE_COMMAND_REGEX.test(str)) {
    return str.match(SIMPLE_COMMAND_REGEX)[0];
  }

  let nodeDepth = 0;
  let position = 1;
  while (position < str.length) {
    let char = str[position];
    position++;
    if (char === '\\') {
      if (position === str.length) {
        return null;
      }
      position++;
    } else if (char === '{') {
      nodeDepth++;
    } else if (char === '}') {
      if (nodeDepth === 0) {
        return str.slice(0, position);
      } else {
        nodeDepth--;
      }
    }
  }
  return null;
}

/**
 * @typedef {object} TokenType
 * @property {string} type
 * @property {number} position
 * @property {string} source
 * @property {string=} identifier
 * @property {string=} string
 * @property {number=} number
 * @property {boolean=} boolean
 * @property {string=} command
 * @property {boolean=} isKeyword
 * @property {boolean=} isSQLKeyword
 */

/**
 * @typedef {object} LexResult
 * @property {string} termination
 * @property {number} position
 * @property {TokenType[]} tokens
 */


/**
 * We allow a shorthand syntax in {repeat}'s to specify the iterator:
 *   `for x in [1,2,3]`
 *
 * Here we convert it to the full valid syntax:
 *   `[1 for x in [1,2,3]]`
 *
 * @param {TokenType[]} tokens - list of tokens, is mutated.
 */
export function pushRepeatTokens(tokens) {
  if (tokens[0] && tokens[0].type === 'FOR') {
    tokens.unshift({
      position: 0,
      type: 'WS',
      source: ''
    });
    tokens.unshift({
      position: 0,
      type: 'NUMBER',
      number: 1,
      source: ''
    });
    tokens.unshift({
      position: 0,
      type: 'LBRACKET',
      source: ''
    });
    tokens.push({
      position: 0,
      type: 'RBRACKET',
      source: ''
    });
  }
}

/**
 * @param {TokenType[]} tokens - list of tokens, is mutated.
 */
export function pushBlockTokens(tokens) {
  // Add block...endblock for {run} code blocks
  tokens.unshift(
    { type: 'BLOCK', isKeyword: true, source: 'block', position: -100 },
    { type: 'WS_B', source: '\n', position: -100 }
  );

  tokens.push(
    { type: 'WS_B', source: '\n', position: -100 },
    { type: 'ENDBLOCK', isKeyword: true, source: 'endblock', position: -100 }
  );
}


/**
 * Tokenizes an equation string. The string may have trailing contents. E.g:
 *   ` 1 + 1; trim=yes} foo`
 *
 * @param {string} str - an equation string
 * @param {Environment} env
 * @param {object} options
 * @param {boolean=} options.singleToken - singleToken - if true, only get the first token (used in formula assignment parsing)
 * @param {boolean=} options.perserveStartWS
 * @param {(boolean)=} options.lexComment
 * @param {(boolean)=} options.skipCleanTokens
 *
 * @return {LexResult}
 */
export function lex(str, env, options = { singleToken: false, perserveStartWS: false, lexComment: false, skipCleanTokens: false }) {
  // remove whitespace from the equation
  let wsLength = 0;
  if (!options.perserveStartWS) {
    let wsMatch = str.match(START_WS_REGEX);
    wsLength = wsMatch ? wsMatch[0].length : 0;
    str = str.trimStart();
  }

  /** @type {TokenType[]} */
  let tokens = [];
  let runningQuote = null;
  let quoteStart = null;
  let position = 0;
  let isInBlock = false;

  /**
   * @param {Omit<TokenType, 'position'>} t
   */
  function emitToken(t) {
    tokens.push({
      ...t,
      position: position - t.source.length,
    });
  }

  /**
   * Collapses consecutive whitespace tokens
   * Returns a new list without modifying the original
   * @returns {TokenType[]}
   */
  function cleanTokens() {
    if (options.skipCleanTokens) {
      return tokens;
    }
    /** @type {TokenType[]} */
    let newTokens = [];
    for (let i = 0; i < tokens.length; i++) {
      let lastToken = newTokens[newTokens.length - 1];
      let token = tokens[i];
      if (token.type.startsWith('WS') && lastToken && lastToken.type.startsWith('WS')) {
        lastToken.source += token.source;
        if (token.type === 'WS_B') {
          lastToken.type = 'WS_B';
        }
      } else {
        newTokens.push(token);
      }
    }
    return newTokens;
  }

  /**
   *
   * @param char
   * @return {{ isBreak: boolean}} | null}
   */
  function isWhitespace(char) {
    // \s from MDN: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes
    // identified the line break characters as defined on Wikipedia
    // https://en.wikipedia.org/wiki/Whitespace_character

    if (char === '\u0020' ||
      char === '\t' ||
      char === '\u00a0' ||
      char === '\u1680' ||
      char === '\u202f' ||
      char === '\u205f' ||
      char === '\u3000' ||
      char === '\ufeff' ||
      (char >= '\u2000' && char <= '\u200a')) {
      return { isBreak: false };
    }

    if (char === '\f' ||
      char === '\n' ||
      char === '\r' ||
      char === '\v' ||
      char === '\u2028' ||
      char === '\u2029') {
      return { isBreak: true };
    }

    return null;
  }

  while (position < str.length) {
    if (tokens.length && options.singleToken) {
      return {
        termination: 'SINGLE_TOKEN',
        position: position + wsLength,
        tokens
      };
    }

    if (runningQuote) {
      let char = str[position];
      position++;
      if (char === '\\') {
        if (position === str.length) {
          runningQuote += char;
        } else {
          let char = str[position];
          position++;
          if (char === 'n') {
            runningQuote += '\n';
          } else if (char === 't') {
            runningQuote += '\t';
          } else if (char === 'r') {
            runningQuote += '\r';
          } else  {
            // Escapes, \n, \t, \\ and \r, otherwise pass the '\' through
            // Will be parsed as a JSON string later
            if ('\\' === char) {
              runningQuote += char;
            } else if (runningQuote[0] === char) {
              // When in a string, escape ("). When in a long identifier, escape (`).
              runningQuote += char;
            } else {
              runningQuote += '\\' + char;
            }
          }
        }
      } else {
        if (char === runningQuote[0]) {
          let s = runningQuote.slice(1);
          if (runningQuote[0] === '"') {
            emitToken({
              type: 'STRING',
              string: s,
              source: str.slice(quoteStart, position)
            });
          } else {
            // it's a `long identifier`
            if (s.length && s.trim()) {
              emitToken({
                type: 'IDENTIFIER',
                identifier: s,
                source: str.slice(quoteStart, position)
              });
            } else {
              // identifier can't have no length or be blank
              emitToken({
                type: 'INVALID',
                source: str.slice(quoteStart, position)
              });
            }
          }
          runningQuote = null;
          quoteStart = null;
          continue;
        } else {
          runningQuote += char;
        }
      }
    } else {
      let currentChar = str[position];

      if (currentChar === '\\') {
        // not valid syntactically in equations
        // needed for escaping ';' in basic list mode
        if (position === str.length - 1) {
          position++;
          emitToken({
            type: 'INVALID',
            source: '\\'
          });
        } else {
          position++;
          position++;
          emitToken({
            type: 'INVALID',
            source: '\\' + (str[position - 1] || '')
          });
        }
        continue;
      }

      let ws;
      if (!!(ws = isWhitespace(currentChar))) {
        const startPosition = position;
        let nextChar = currentChar;
        let newLineProcessed;
        do {
          if (!newLineProcessed) {
            newLineProcessed = ws.isBreak;
          }
          position++;
          nextChar = str[position];
        } while (!!(ws = isWhitespace(nextChar)));
        const token = str.substring(startPosition, position);
        emitToken({
          type: newLineProcessed ? 'WS_B' : 'WS',
          source: token,
        });
        continue;
      }

      if ((currentChar >= '0' && currentChar <= '9') || currentChar === '.') {
        const startPosition = position;
        let nextChar;
        let periodProcessed = currentChar === '.';
        do {
          position++;
          nextChar = str[position];
          if (nextChar === '.') {
            if (periodProcessed) {
              break;
            }
            periodProcessed = true;
          }
        } while ((nextChar && ((nextChar >= '0' && nextChar <= '9') || nextChar === '.')));
        if (periodProcessed && position - startPosition === 1) {
          position = startPosition;
        } else {
          const token = str.substring(startPosition, position);
          emitToken({
            type: 'NUMBER',
            number: parseFloat(token),
            source: token,
          });
          continue;
        }
      }

      if (currentChar === ';') {
        position++;
        return {
          termination: 'END_ATTRIBUTE',
          position: position + wsLength,
          tokens: cleanTokens()
        };
      }

      if (currentChar === '#' && (isInBlock || options.lexComment)) {
        // comment, removing everything up to the newline
        let index = str.indexOf('\n', position) - position;
        if (index > -1) {
          position += index;

          continue;
        } else {
          position++;
          emitToken({
            type: 'INVALID',
            source: currentChar
          });
          continue;
        }
      }

      if (currentChar === '}') {
        position++;
        return {
          termination: 'END_COMMAND',
          position:  position + wsLength,
          tokens: cleanTokens()
        };
      }

      if (currentChar === '{') {
        let loc = str.slice(position);
        let command = processCommandInAttribute(loc, env);
        if (command) {
          position += command.length;
          emitToken({
            type: 'COMMAND',
            command,
            source: command
          });
          continue;
        } else {
          // we want the error to show up later if we have something that looks like a command
          // but is actually invalid
          let command = parseBrokenCommand(loc);
          if (command) {
            position += command.length;
            emitToken({
              type: 'COMMAND',
              command,
              source: command
            });
            continue;
          }
        }
      }

      // Start strings and long identifiers.
      // They both support the same escaping logic
      if (currentChar === '"' || currentChar === '`') {
        runningQuote = currentChar;
        quoteStart = position;
        position++;
        continue;
      }

      if (LEX_TRIE.trie[currentChar]) {
        const startPosition = position;
        let nextNode = LEX_TRIE.trie[currentChar];
        let lastLeafPos;
        let kind;
        do {
          position++;
          if (nextNode.leaf) {
            lastLeafPos = position;
            kind = nextNode.kind;
          }
          nextNode = nextNode.trie[str[position]];
        } while (nextNode);

        let process = true;
        if (lastLeafPos === undefined) {
          // no token found in trie, move back and stop processing
          position = startPosition;
          process = false;
        } else if (kind !== 'basic') {
          // for keyword tokens, next char should not be alphanumeric/underscore
          const nextChar = str[lastLeafPos];
          if (nextChar && (
            nextChar === '_' ||
            (nextChar >= '0' && nextChar <= '9') ||
            (nextChar >= 'a' && nextChar <= 'z') ||
            (nextChar >= 'A' && nextChar <= 'Z'))
          ) {
            position = startPosition;
            process = false;
          }
        }

        if (process) {
          position = lastLeafPos;
          const source = str.substring(startPosition, position);
          const token = source.toUpperCase();
          if (kind === 'basic') {
            emitToken({
              type: BASIC_TOKENS[token],
              source: token
            });
          } else if (kind === 'keyword') {
            if (token === 'YES' || token === 'NO') {
              emitToken({
                type: 'BOOLEAN',
                boolean: token === 'YES',
                source,
              });
            } else {
              if (token === 'BLOCK') {
                isInBlock = true;
              } else if (token === 'ENDBLOCK') {
                isInBlock = false;
              }
              emitToken({
                type: token,
                isKeyword: true,
                source,
              });
            }
          }
          continue;
        }
      }

      if ((currentChar >= 'a' && currentChar <= 'z') || (currentChar >= 'A' && currentChar <= 'Z')) {
        const startPosition = position;
        let nextChar;
        do {
          position++;
          nextChar = str[position];
        } while (nextChar === '_' ||
          (nextChar >= 'a' && nextChar <= 'z') ||
          (nextChar >= 'A' && nextChar <= 'Z') ||
          (nextChar >= '0' && nextChar <= '9')
        );
        const token = str.substring(startPosition, position);
        emitToken({
          type: 'IDENTIFIER',
          identifier: token,
          source: token
        });
        continue;
      }

      // we want to be able to handle errors with invalid characters in the equation
      position++;
      emitToken({
        type: 'INVALID',
        source: currentChar,
      });
    }
  }

  if (runningQuote) {
    emitToken({
      type: 'INVALID',
      source: str.slice(quoteStart, position)
    });
  }


  return {
    termination: 'END_STRING',
    position: position + wsLength,
    tokens: cleanTokens()
  };
}

export const SQL_PLACEHOLDER_REGEX = /^\$\d+/;
/* END_REMOVE_IN_PYTHON */


export const SQL_KEYWORD_TOKENS = [
  'SELECT',
  'ORDER',
  'BY',
  'AS',
  'WHERE',
  'LIMIT',
  'GROUP',
  'FROM',
  'ASC',
  'DESC',
  'HAVING',
  'DELETE',
  'SET',
  'UPDATE',
  'INSERT',
  'INTO',
  'VALUES'
];

const LEX_SQL_TRIE = tokensToTrie([
  {
    tokens: Object.keys(BASIC_TOKENS),
    kind: 'basic',
  },
  {
    tokens: KEYWORD_TOKENS,
    kind: 'keyword',
  },/* BEGIN_REMOVE_FOR_EQUATION */
  {
    tokens: SQL_KEYWORD_TOKENS,
    kind: 'sql',
  },/* END_REMOVE_FOR_EQUATION */
]);


/**
 * @param {string} str - an equation string
 * @param {{singleToken?: boolean, perserveStartWS?: boolean }=} options - singleToken - if true, only get the first token (used in formula assignment parsing)
 * @return {LexResult}
 */
export function lexSQL(str, options = { singleToken: false, perserveStartWS: false }) {
  // remove whitespace from the equation
  let wsLength = 0;
  if (!options.perserveStartWS) {
    let wsMatch = str.match(START_WS_REGEX);
    wsLength = wsMatch ? wsMatch[0].length : 0;
    str = str.replace(START_WS_REGEX, '');
  }

  /** @type {TokenType[]} */
  let tokens = [];
  let runningQuote = null;
  let quoteStart = null;
  let position = 0;

  /**
   * @param {Omit<TokenType, 'position'>} t
   */
  function emitToken(t) {
    // @ts-ignore
    t.position = position - t.source.length;
    tokens.push(/** @type {TokenType} */ (t));
  }

  // IMPORTANT NOTE: this stepper is required for js2py
  // eslint-disable-next-line
  stepper:
  while (position < str.length) {
    if (tokens.length && options.singleToken) {
      return {
        termination: 'SINGLE_TOKEN',
        position: position + wsLength,
        tokens
      };
    }

    if (runningQuote) {
      let char = str[position];
      position++;
      if (char === '\\') {
        if (position === str.length) {
          runningQuote += char;
        } else {
          let char = str[position];
          position++;
          if (char === 'n') {
            runningQuote += '\n';
          } else if (char === 't') {
            runningQuote += '\t';
          } else if (char === 'r') {
            runningQuote += '\r';
          } else  {
            // Escapes, \n, \t, \\ and \r, otherwise pass the '\' through
            // Will be parsed as a JSON string later
            if ('\\' === char) {
              runningQuote += char;
            } else if (runningQuote[0] === char) {
              // When in a string, escape ("). When in a long identifier, escape (`).
              runningQuote += char;
            } else {
              runningQuote += '\\' + char;
            }
          }
        }
      } else {
        if (char === runningQuote[0]) {
          let s = runningQuote.slice(1);
          if (runningQuote[0] === '"') {
            emitToken({
              type: 'STRING',
              string: s,
              source: str.slice(quoteStart, position)
            });
          } else {
            // it's a `long identifier`
            if (s.length && s.trim()) {
              emitToken({
                type: 'IDENTIFIER',
                identifier: s,
                source: str.slice(quoteStart, position)
              });
            } else {
              // identifier can't have no length or be blank
              emitToken({
                type: 'INVALID',
                source: str.slice(quoteStart, position)
              });
            }
          }
          runningQuote = null;
          quoteStart = null;
          continue;
        } else {
          runningQuote += char;
        }
      }
    } else {
      let currentChar = str[position];

      if (currentChar === '\\') {
        // not valid syntactically in equations
        // needed for escaping ';' in basic list mode
        if (position === str.length - 1) {
          position++;
          emitToken({
            type: 'INVALID',
            source: '\\'
          });
        } else {
          position++;
          position++;
          emitToken({
            type: 'INVALID',
            source: '\\' + (str[position - 1] || '')
          });
        }
        continue;
      }

      // we're avoiding calling any top-level/inner util functions for those checks to keep it performing well
      // after converted to python
      if (currentChar === ' ' || currentChar === '\n' || currentChar === '\r' || currentChar === '\t') {
        const startPosition = position;
        let nextChar;
        do {
          position++;
          nextChar = str[position];
        } while (nextChar === ' ' || nextChar === '\n' || nextChar === '\r' || nextChar === '\t');
        const token = str.substring(startPosition, position);
        emitToken({
          type: 'WS',
          source: token,
        });
        continue;
      }

      if ((currentChar >= '0' && currentChar <= '9') || currentChar === '.') {
        const startPosition = position;
        let nextChar;
        let periodProcessed = currentChar === '.';
        do {
          position++;
          nextChar = str[position];
          if (nextChar === '.') {
            if (periodProcessed) {
              break;
            }
            periodProcessed = true;
          }
        } while ((nextChar && ((nextChar >= '0' && nextChar <= '9') || nextChar === '.')));
        if (periodProcessed && position - startPosition === 1) {
          position = startPosition;
        } else {
          const token = str.substring(startPosition, position);
          emitToken({
            type: 'NUMBER',
            number: parseFloat(token),
            source: token,
          });
          continue;
        }
      }

      if (currentChar === ';') {
        position++;
        return {
          termination: 'END_ATTRIBUTE',
          position: position + wsLength,
          tokens
        };
      }

      // Start strings and long identifiers.
      // They both support the same escaping logic
      if (currentChar === '"' || currentChar === '`') {
        runningQuote = currentChar;
        quoteStart = position;
        position++;
        continue;
      }

      if (LEX_SQL_TRIE.trie[currentChar]) {
        const startPosition = position;
        let nextNode = LEX_SQL_TRIE.trie[currentChar];
        let lastLeafPos;
        let kind;
        do {
          position++;
          if (nextNode.leaf) {
            lastLeafPos = position;
            kind = nextNode.kind;
          }
          nextNode = nextNode.trie[str[position]];
        } while (nextNode);

        let process = true;
        if (lastLeafPos === undefined) {
          // no token found in trie, move back and stop processing
          position = startPosition;
          process = false;
        } else if (kind !== 'basic') {
          // for keyword/sql tokens, next char should not be alphanumeric/underscore
          const nextChar = str[lastLeafPos];
          if (nextChar && (
            nextChar === '_' ||
            (nextChar >= '0' && nextChar <= '9') ||
            (nextChar >= 'a' && nextChar <= 'z') ||
            (nextChar >= 'A' && nextChar <= 'Z'))
          ) {
            position = startPosition;
            process = false;
          }
        }

        if (process) {
          position = lastLeafPos;
          const source = str.substring(startPosition, position);
          const token = source.toUpperCase();
          if (kind === 'basic') {
            emitToken({
              type: BASIC_TOKENS[token],
              source: token
            });
          } else if (kind === 'keyword') {
            if (token === 'YES' || token === 'NO') {
              emitToken({
                type: 'BOOLEAN',
                boolean: token === 'YES',
                source,
              });
            } else {
              emitToken({
                type: token,
                isKeyword: true,
                isSQLKeyword: false,
                source,
              });
            }
          }/* BEGIN_REMOVE_FOR_EQUATION */ else if (kind === 'sql') {
            emitToken({
              type: token,
              isKeyword: true,
              isSQLKeyword: true,
              source,
            });
          }/* END_REMOVE_FOR_EQUATION */
          continue;
        }
      }

      if ((currentChar >= 'a' && currentChar <= 'z') || (currentChar >= 'A' && currentChar <= 'Z')) {
        const startPosition = position;
        let nextChar;
        do {
          position++;
          nextChar = str[position];
        } while (nextChar === '_' ||
          (nextChar >= 'a' && nextChar <= 'z') ||
          (nextChar >= 'A' && nextChar <= 'Z') ||
          (nextChar >= '0' && nextChar <= '9')
        );
        const token = str.substring(startPosition, position);
        emitToken({
          type: 'IDENTIFIER',
          identifier: token,
          source: token
        });
        continue;
      }

      /* BEGIN_REMOVE_FOR_EQUATION */
      if (currentChar === '$') {
        const startPosition = position;
        let nextChar;
        do {
          position++;
          nextChar = str[position];
        } while (nextChar >= '0' && nextChar <= '9');
        if (position - startPosition > 1) {
          const id = str.substring(startPosition, position);
          emitToken({
            type: 'IDENTIFIER',
            identifier: id,
            source: id,
          });
          continue;
        } else {
          position = startPosition;
        }
      }
      /* END_REMOVE_FOR_EQUATION */

      // we want to be able to handle errors with invalid characters in the equation
      position++;
      emitToken({
        type: 'INVALID',
        source: currentChar
      });
    }
  }

  if (runningQuote) {
    emitToken({
      type: 'INVALID',
      source: str.slice(quoteStart, position)
    });
  }

  return {
    termination: 'END_STRING',
    position: position + wsLength,
    tokens
  };
}




/* BEGIN_REMOVE_IN_PYTHON */
/**
 *
 * @param {string} str - an equation string
 * @param {Environment} env
 * @param {{singleToken?: boolean, perserveStartWS?: boolean }=} options - singleToken - if true, only get the first token (used in formula assignment parsing)
 * @return {LexResult}
 */
export function lexSQLWithCommands(str, env, options = { singleToken: false, perserveStartWS: false }) {
  // remove whitespace from the equation
  let wsLength = 0;
  if (!options.perserveStartWS) {

    let wsMatch = str.match(START_WS_REGEX);
    wsLength = wsMatch ? wsMatch[0].length : 0;
    str = str.replace(START_WS_REGEX, '');
  }

  /** @type {TokenType[]} */
  let tokens = [];
  let runningQuote = null;
  let quoteStart = null;
  let position = 0;

  /**
   * @param {Omit<TokenType, 'position'>} t
   */
  function emitToken(t) {
    // @ts-ignore
    t.position = position - t.source.length;
    tokens.push(/** @type {TokenType} */ (t));
  }

  while (position < str.length) {
    if (tokens.length && options.singleToken) {
      return {
        termination: 'SINGLE_TOKEN',
        position: position + wsLength,
        tokens
      };
    }

    if (runningQuote) {
      let char = str[position];
      position++;
      if (char === '\\') {
        if (position === str.length) {
          runningQuote += char;
        } else {
          let char = str[position];
          position++;
          if (char === 'n') {
            runningQuote += '\n';
          } else if (char === 't') {
            runningQuote += '\t';
          } else if (char === 'r') {
            runningQuote += '\r';
          } else  {
            // Escapes, \n, \t, \\ and \r, otherwise pass the '\' through
            // Will be parsed as a JSON string later
            if ('\\' === char) {
              runningQuote += char;
            } else if (runningQuote[0] === char) {
              // When in a string, escape ("). When in a long identifier, escape (`).
              runningQuote += char;
            } else {
              runningQuote += '\\' + char;
            }
          }
        }
      } else {
        if (char === runningQuote[0]) {
          let s = runningQuote.slice(1);
          if (runningQuote[0] === '"') {
            emitToken({
              type: 'STRING',
              string: s,
              source: str.slice(quoteStart, position)
            });
          } else {
            // it's a `long identifier`
            if (s.length && s.trim()) {
              emitToken({
                type: 'IDENTIFIER',
                identifier: s,
                source: str.slice(quoteStart, position)
              });
            } else {
              // identifier can't have no length or be blank
              emitToken({
                type: 'INVALID',
                source: str.slice(quoteStart, position)
              });
            }
          }
          runningQuote = null;
          quoteStart = null;
          continue;
        } else {
          runningQuote += char;
        }
      }
    } else {
      let currentChar = str[position];

      if (currentChar === '\\') {
        // not valid syntactically in equations
        // needed for escaping ';' in basic list mode
        if (position === str.length - 1) {
          position++;
          emitToken({
            type: 'INVALID',
            source: '\\'
          });
        } else {
          position++;
          position++;
          emitToken({
            type: 'INVALID',
            source: '\\' + (str[position - 1] || '')
          });
        }
        continue;
      }

      if (currentChar === ' ' || currentChar === '\n' || currentChar === '\r' || currentChar === '\t') {
        const startPosition = position;
        let nextChar;
        do {
          position++;
          nextChar = str[position];
        } while (nextChar === ' ' || nextChar === '\n' || nextChar === '\r' || nextChar === '\t');
        const token = str.substring(startPosition, position);
        emitToken({
          type: 'WS',
          source: token,
        });
        continue;
      }

      if ((currentChar >= '0' && currentChar <= '9') || currentChar === '.') {
        const startPosition = position;
        let nextChar;
        let periodProcessed = currentChar === '.';
        do {
          position++;
          nextChar = str[position];
          if (nextChar === '.') {
            if (periodProcessed) {
              break;
            }
            periodProcessed = true;
          }
        } while ((nextChar && ((nextChar >= '0' && nextChar <= '9') || nextChar === '.')));
        if (periodProcessed && position - startPosition === 1) {
          position = startPosition;
        } else {
          const token = str.substring(startPosition, position);
          emitToken({
            type: 'NUMBER',
            number: parseFloat(token),
            source: token,
          });
          continue;
        }
      }

      if (currentChar === ';') {
        position++;
        return {
          termination: 'END_ATTRIBUTE',
          position: position + wsLength,
          tokens
        };
      }

      if (currentChar === '}') {
        position++;
        return {
          termination: 'END_COMMAND',
          position:  position + wsLength,
          tokens
        };
      }

      if (currentChar === '{') {
        let loc = str.slice(position);
        let command = processCommandInAttribute(loc, env);
        if (command) {
          position += command.length;
          emitToken({
            type: 'COMMAND',
            command,
            source: command
          });
          continue;
        } else {
          // we want the error to show up later if we have something that looks like a command
          // but is actually invalid
          let command = parseBrokenCommand(loc);
          if (command) {
            position += command.length;
            emitToken({
              type: 'COMMAND',
              command,
              source: command
            });
            continue;
          }
        }
      }

      // Start strings and long identifiers.
      // They both support the same escaping logic
      if (currentChar === '"' || currentChar === '`') {
        runningQuote = currentChar;
        quoteStart = position;
        position++;
        continue;
      }

      if (LEX_SQL_TRIE.trie[currentChar]) {
        const startPosition = position;
        let nextNode = LEX_SQL_TRIE.trie[currentChar];
        let lastLeafPos;
        let kind;
        do {
          position++;
          if (nextNode.leaf) {
            lastLeafPos = position;
            kind = nextNode.kind;
          }
          nextNode = nextNode.trie[str[position]];
        } while (nextNode);

        let process = true;
        if (lastLeafPos === undefined) {
          // no token found in trie, move back and stop processing
          position = startPosition;
          process = false;
        } else if (kind !== 'basic') {
          // for keyword/sql tokens, next char should not be alphanumeric/underscore
          const nextChar = str[lastLeafPos];
          if (nextChar && (
            nextChar === '_' ||
            (nextChar >= '0' && nextChar <= '9') ||
            (nextChar >= 'a' && nextChar <= 'z') ||
            (nextChar >= 'A' && nextChar <= 'Z'))
          ) {
            position = startPosition;
            process = false;
          }
        }

        if (process) {
          position = lastLeafPos;
          const source = str.substring(startPosition, position);
          const token = source.toUpperCase();
          if (kind === 'basic') {
            emitToken({
              type: BASIC_TOKENS[token],
              source: token
            });
          } else if (kind === 'keyword') {
            if (token === 'YES' || token === 'NO') {
              emitToken({
                type: 'BOOLEAN',
                boolean: token === 'YES',
                source,
              });
            } else {
              emitToken({
                type: token,
                isKeyword: true,
                isSQLKeyword: false,
                source,
              });
            }
          } else if (kind === 'sql') {
            emitToken({
              type: token,
              isKeyword: true,
              isSQLKeyword: true,
              source,
            });
          }
          continue;
        }
      }

      if ((currentChar >= 'a' && currentChar <= 'z') || (currentChar >= 'A' && currentChar <= 'Z')) {
        const startPosition = position;
        let nextChar;
        do {
          position++;
          nextChar = str[position];
        } while (nextChar === '_' ||
          (nextChar >= 'a' && nextChar <= 'z') ||
          (nextChar >= 'A' && nextChar <= 'Z') ||
          (nextChar >= '0' && nextChar <= '9')
        );
        const token = str.substring(startPosition, position);
        emitToken({
          type: 'IDENTIFIER',
          identifier: token,
          source: token
        });
        continue;
      }


      if (currentChar === '@') {
        const startPosition = position;
        position++;
        let nextChar = str[position];
        if (nextChar && ((nextChar >= 'a' && nextChar <= 'z') || (nextChar >= 'A' && nextChar <= 'Z'))) {
          do {
            position++;
            nextChar = str[position];
          } while (nextChar === '_' ||
            (nextChar >= 'a' && nextChar <= 'z') ||
            (nextChar >= 'A' && nextChar <= 'Z') ||
            (nextChar >= '0' && nextChar <= '9')
          );
          const id = str.substring(startPosition, position);
          emitToken({
            type: 'IDENTIFIER',
            identifier: id,
            source: id,
          });
          continue;
        } else {
          position = startPosition;
        }
      }

      // we want to be able to handle errors with invalid characters in the equation
      position++;
      emitToken({
        type: 'INVALID',
        source: currentChar,
      });
    }
  }

  if (runningQuote) {
    emitToken({
      type: 'INVALID',
      source: str.slice(quoteStart, position)
    });
  }

  return {
    termination: 'END_STRING',
    position: position + wsLength,
    tokens
  };
}


/* END_REMOVE_IN_PYTHON */