Spaces:
Running
on
T4
Running
on
T4
/* eslint no-constant-condition:0 */ | |
var functions = require("./functions"); | |
var environments = require("./environments"); | |
var Lexer = require("./Lexer"); | |
var symbols = require("./symbols"); | |
var utils = require("./utils"); | |
var parseData = require("./parseData"); | |
var ParseError = require("./ParseError"); | |
global_str = "" | |
/** | |
* This file contains the parser used to parse out a TeX expression from the | |
* input. Since TeX isn't context-free, standard parsers don't work particularly | |
* well. | |
* | |
* The strategy of this parser is as such: | |
* | |
* The main functions (the `.parse...` ones) take a position in the current | |
* parse string to parse tokens from. The lexer (found in Lexer.js, stored at | |
* this.lexer) also supports pulling out tokens at arbitrary places. When | |
* individual tokens are needed at a position, the lexer is called to pull out a | |
* token, which is then used. | |
* | |
* The parser has a property called "mode" indicating the mode that | |
* the parser is currently in. Currently it has to be one of "math" or | |
* "text", which denotes whether the current environment is a math-y | |
* one or a text-y one (e.g. inside \text). Currently, this serves to | |
* limit the functions which can be used in text mode. | |
* | |
* The main functions then return an object which contains the useful data that | |
* was parsed at its given point, and a new position at the end of the parsed | |
* data. The main functions can call each other and continue the parsing by | |
* using the returned position as a new starting point. | |
* | |
* There are also extra `.handle...` functions, which pull out some reused | |
* functionality into self-contained functions. | |
* | |
* The earlier functions return ParseNodes. | |
* The later functions (which are called deeper in the parse) sometimes return | |
* ParseFuncOrArgument, which contain a ParseNode as well as some data about | |
* whether the parsed object is a function which is missing some arguments, or a | |
* standalone object which can be used as an argument to another function. | |
*/ | |
/** | |
* Main Parser class | |
*/ | |
function Parser(input, settings) { | |
// Make a new lexer | |
this.lexer = new Lexer(input); | |
// Store the settings for use in parsing | |
this.settings = settings; | |
} | |
var ParseNode = parseData.ParseNode; | |
/** | |
* An initial function (without its arguments), or an argument to a function. | |
* The `result` argument should be a ParseNode. | |
*/ | |
function ParseFuncOrArgument(result, isFunction) { | |
this.result = result; | |
// Is this a function (i.e. is it something defined in functions.js)? | |
this.isFunction = isFunction; | |
} | |
/** | |
* Checks a result to make sure it has the right type, and throws an | |
* appropriate error otherwise. | |
* | |
* @param {boolean=} consume whether to consume the expected token, | |
* defaults to true | |
*/ | |
Parser.prototype.expect = function(text, consume) { | |
if (this.nextToken.text !== text) { | |
throw new ParseError( | |
"Expected '" + text + "', got '" + this.nextToken.text + "'", | |
this.lexer, this.nextToken.position | |
); | |
} | |
if (consume !== false) { | |
this.consume(); | |
} | |
}; | |
/** | |
* Considers the current look ahead token as consumed, | |
* and fetches the one after that as the new look ahead. | |
*/ | |
Parser.prototype.consume = function() { | |
this.pos = this.nextToken.position; | |
global_str = global_str + " " + this.nextToken.text | |
this.nextToken = this.lexer.lex(this.pos, this.mode); | |
}; | |
/** | |
* Main parsing function, which parses an entire input. | |
* | |
* @return {?Array.<ParseNode>} | |
*/ | |
Parser.prototype.parse = function() { | |
// Try to parse the input | |
this.mode = "math"; | |
this.pos = 0; | |
this.nextToken = this.lexer.lex(this.pos, this.mode); | |
var parse = this.parseInput(); | |
return parse; | |
}; | |
/** | |
* Parses an entire input tree. | |
*/ | |
Parser.prototype.parseInput = function() { | |
// Parse an expression | |
var expression = this.parseExpression(false); | |
// If we succeeded, make sure there's an EOF at the end | |
this.expect("EOF", false); | |
return expression; | |
}; | |
var endOfExpression = ["}", "\\end", "\\right", "&", "\\\\", "\\cr"]; | |
/** | |
* Parses an "expression", which is a list of atoms. | |
* | |
* @param {boolean} breakOnInfix Should the parsing stop when we hit infix | |
* nodes? This happens when functions have higher precendence | |
* than infix nodes in implicit parses. | |
* | |
* @param {?string} breakOnToken The token that the expression should end with, | |
* or `null` if something else should end the expression. | |
* | |
* @return {ParseNode} | |
*/ | |
Parser.prototype.parseExpression = function(breakOnInfix, breakOnToken) { | |
var body = []; | |
// Keep adding atoms to the body until we can't parse any more atoms (either | |
// we reached the end, a }, or a \right) | |
while (true) { | |
var lex = this.nextToken; | |
var pos = this.pos; | |
if (endOfExpression.indexOf(lex.text) !== -1) { | |
break; | |
} | |
if (breakOnToken && lex.text === breakOnToken) { | |
break; | |
} | |
var atom = this.parseAtom(); | |
if (!atom) { | |
if (!this.settings.throwOnError && lex.text[0] === "\\") { | |
var errorNode = this.handleUnsupportedCmd(); | |
body.push(errorNode); | |
pos = lex.position; | |
continue; | |
} | |
break; | |
} | |
if (breakOnInfix && atom.type === "infix") { | |
// rewind so we can parse the infix atom again | |
this.pos = pos; | |
this.nextToken = lex; | |
break; | |
} | |
body.push(atom); | |
} | |
return this.handleInfixNodes(body); | |
}; | |
/** | |
* Rewrites infix operators such as \over with corresponding commands such | |
* as \frac. | |
* | |
* There can only be one infix operator per group. If there's more than one | |
* then the expression is ambiguous. This can be resolved by adding {}. | |
* | |
* @returns {Array} | |
*/ | |
Parser.prototype.handleInfixNodes = function(body) { | |
var overIndex = -1; | |
var funcName; | |
for (var i = 0; i < body.length; i++) { | |
var node = body[i]; | |
if (node.type === "infix") { | |
if (overIndex !== -1) { | |
throw new ParseError("only one infix operator per group", | |
this.lexer, -1); | |
} | |
overIndex = i; | |
funcName = node.value.replaceWith; | |
} | |
} | |
if (overIndex !== -1) { | |
var numerNode; | |
var denomNode; | |
var numerBody = body.slice(0, overIndex); | |
var denomBody = body.slice(overIndex + 1); | |
if (numerBody.length === 1 && numerBody[0].type === "ordgroup") { | |
numerNode = numerBody[0]; | |
} else { | |
numerNode = new ParseNode("ordgroup", numerBody, this.mode); | |
} | |
if (denomBody.length === 1 && denomBody[0].type === "ordgroup") { | |
denomNode = denomBody[0]; | |
} else { | |
denomNode = new ParseNode("ordgroup", denomBody, this.mode); | |
} | |
var value = this.callFunction( | |
funcName, [numerNode, denomNode], null); | |
return [new ParseNode(value.type, value, this.mode)]; | |
} else { | |
return body; | |
} | |
}; | |
// The greediness of a superscript or subscript | |
var SUPSUB_GREEDINESS = 1; | |
/** | |
* Handle a subscript or superscript with nice errors. | |
*/ | |
Parser.prototype.handleSupSubscript = function(name) { | |
var symbol = this.nextToken.text; | |
var symPos = this.pos; | |
this.consume(); | |
var group = this.parseGroup(); | |
if (!group) { | |
if (!this.settings.throwOnError && this.nextToken.text[0] === "\\") { | |
return this.handleUnsupportedCmd(); | |
} else { | |
// throw new ParseError( | |
// "Expected group after '" + symbol + "'", | |
// this.lexer, | |
// symPos + 1 | |
// ); | |
} | |
} else if (group.isFunction) { | |
// ^ and _ have a greediness, so handle interactions with functions' | |
// greediness | |
var funcGreediness = functions[group.result].greediness; | |
if (funcGreediness > SUPSUB_GREEDINESS) { | |
return this.parseFunction(group); | |
} else { | |
throw new ParseError( | |
"Got function '" + group.result + "' with no arguments " + | |
"as " + name, | |
this.lexer, symPos + 1); | |
} | |
} else { | |
return group.result; | |
} | |
}; | |
/** | |
* Converts the textual input of an unsupported command into a text node | |
* contained within a color node whose color is determined by errorColor | |
*/ | |
Parser.prototype.handleUnsupportedCmd = function() { | |
var text = this.nextToken.text; | |
var textordArray = []; | |
for (var i = 0; i < text.length; i++) { | |
textordArray.push(new ParseNode("textord", text[i], "text")); | |
} | |
var textNode = new ParseNode( | |
"text", | |
{ | |
body: textordArray, | |
type: "text", | |
}, | |
this.mode); | |
var colorNode = new ParseNode( | |
"color", | |
{ | |
color: this.settings.errorColor, | |
value: [textNode], | |
type: "color", | |
}, | |
this.mode); | |
this.consume(); | |
return colorNode; | |
}; | |
/** | |
* Parses a group with optional super/subscripts. | |
* | |
* @return {?ParseNode} | |
*/ | |
Parser.prototype.parseAtom = function() { | |
// The body of an atom is an implicit group, so that things like | |
// \left(x\right)^2 work correctly. | |
var base = this.parseImplicitGroup(); | |
// In text mode, we don't have superscripts or subscripts | |
if (this.mode === "text") { | |
return base; | |
} | |
// Note that base may be empty (i.e. null) at this point. | |
var superscript; | |
var subscript; | |
while (true) { | |
// Lex the first token | |
var lex = this.nextToken; | |
if (lex.text === "\\limits" || lex.text === "\\nolimits") { | |
// We got a limit control | |
if (!base || base.type !== "op") { | |
throw new ParseError( | |
"Limit controls must follow a math operator", | |
this.lexer, this.pos); | |
} else { | |
var limits = lex.text === "\\limits"; | |
base.value.limits = limits; | |
base.value.alwaysHandleSupSub = true; | |
} | |
this.consume(); | |
} else if (lex.text === "^") { | |
// We got a superscript start | |
// if (superscript) { | |
// throw new ParseError( | |
// "Double superscript", this.lexer, this.pos); | |
// } | |
superscript = this.handleSupSubscript("superscript"); | |
} else if (lex.text === "_") { | |
// We got a subscript start | |
// if (subscript) { | |
// throw new ParseError( | |
// "Double subscript", this.lexer, this.pos); | |
// } | |
subscript = this.handleSupSubscript("subscript"); | |
} else if (lex.text === "'") { | |
// We got a prime | |
var prime = new ParseNode("textord", "\\prime", this.mode); | |
// Many primes can be grouped together, so we handle this here | |
var primes = [prime]; | |
this.consume(); | |
// Keep lexing tokens until we get something that's not a prime | |
while (this.nextToken.text === "'") { | |
// For each one, add another prime to the list | |
primes.push(prime); | |
this.consume(); | |
} | |
// Put them into an ordgroup as the superscript | |
superscript = new ParseNode("ordgroup", primes, this.mode); | |
} else { | |
// If it wasn't ^, _, or ', stop parsing super/subscripts | |
break; | |
} | |
} | |
if (superscript || subscript) { | |
// If we got either a superscript or subscript, create a supsub | |
return new ParseNode("supsub", { | |
base: base, | |
sup: superscript, | |
sub: subscript, | |
}, this.mode); | |
} else { | |
// Otherwise return the original body | |
return base; | |
} | |
}; | |
// A list of the size-changing functions, for use in parseImplicitGroup | |
var sizeFuncs = [ | |
"\\tiny", "\\scriptsize", "\\footnotesize", "\\small", "\\normalsize", | |
"\\large", "\\Large", "\\LARGE", "\\huge", "\\Huge", "\\textrm", "\\rm", "\\cal", | |
"\\bf", "\\siptstyle", "\\boldmath", "\\it" | |
]; | |
// A list of the style-changing functions, for use in parseImplicitGroup | |
var styleFuncs = [ | |
"\\displaystyle", "\\textstyle", "\\scriptstyle", "\\scriptscriptstyle", | |
]; | |
/** | |
* Parses an implicit group, which is a group that starts at the end of a | |
* specified, and ends right before a higher explicit group ends, or at EOL. It | |
* is used for functions that appear to affect the current style, like \Large or | |
* \textrm, where instead of keeping a style we just pretend that there is an | |
* implicit grouping after it until the end of the group. E.g. | |
* small text {\Large large text} small text again | |
* It is also used for \left and \right to get the correct grouping. | |
* | |
* @return {?ParseNode} | |
*/ | |
Parser.prototype.parseImplicitGroup = function() { | |
var start = this.parseSymbol(); | |
if (start == null) { | |
// If we didn't get anything we handle, fall back to parseFunction | |
return this.parseFunction(); | |
} | |
var func = start.result; | |
var body; | |
if (func === "\\left") { | |
// If we see a left: | |
// Parse the entire left function (including the delimiter) | |
var left = this.parseFunction(start); | |
// Parse out the implicit body | |
body = this.parseExpression(false); | |
// Check the next token | |
this.expect("\\right", false); | |
var right = this.parseFunction(); | |
return new ParseNode("leftright", { | |
body: body, | |
left: left.value.value, | |
right: right.value.value, | |
}, this.mode); | |
} else if (func === "\\begin") { | |
// begin...end is similar to left...right | |
var begin = this.parseFunction(start); | |
var envName = begin.value.name; | |
var name = (begin.value.name + "") | |
global_str = global_str.substring(0, global_str.length - (name.length * 2 + 2)) + name + "}" | |
if (!environments.hasOwnProperty(envName)) { | |
throw new ParseError( | |
"No such environment: " + envName, | |
this.lexer, begin.value.namepos); | |
} | |
// Build the environment object. Arguments and other information will | |
// be made available to the begin and end methods using properties. | |
var env = environments[envName]; | |
var args = this.parseArguments("\\begin{" + envName + "}", env); | |
var context = { | |
mode: this.mode, | |
envName: envName, | |
parser: this, | |
lexer: this.lexer, | |
positions: args.pop(), | |
}; | |
var result = env.handler(context, args); | |
this.expect("\\end", false); | |
var end = this.parseFunction(); | |
var name = (begin.value.name + "") | |
global_str = global_str.substring(0, global_str.length - (name.length * 2 + 2)) + name + "}" | |
if (end.value.name !== envName) { | |
throw new ParseError( | |
"Mismatch: \\begin{" + envName + "} matched " + | |
"by \\end{" + end.value.name + "}", | |
this.lexer /* , end.value.namepos */); | |
// TODO: Add position to the above line and adjust test case, | |
// requires #385 to get merged first | |
} | |
result.position = end.position; | |
return result; | |
} else if (func.value == "\\matrix" || func.value == "\\pmatrix" || func.value == "\\cases") { | |
// if (!environments.hasOwnProperty(envName)) { | |
// throw new ParseError( | |
// "No such environment: " + envName, | |
// this.lexer, begin.value.namepos); | |
// } | |
// Build the environment object. Arguments and other information will | |
// be made available to the begin and end methods using properties. | |
envName = func.value.slice(1); | |
var env = environments[envName]; | |
// var args = this.parseArguments("\\matrix{", env); | |
this.expect("{", true); | |
var context = { | |
mode: this.mode, | |
envName: envName, | |
parser: this, | |
lexer: this.lexer | |
}; | |
var result = env.handler(context, {} ); | |
// exit(); | |
this.expect("}", true); | |
// var end = this.parseFunction(); | |
var next = this.nextToken.text; | |
// exit(); | |
// console.log(next); | |
// var name = ( + "") | |
// global_str = global_str.substring(0, global_str.length - (name.length * 2 + 2)) + name + "}" | |
// result.position = end.position; | |
return result; | |
} else if (utils.contains(sizeFuncs, func)) { | |
// If we see a sizing function, parse out the implict body | |
body = this.parseExpression(false); | |
return new ParseNode("sizing", { | |
// Figure out what size to use based on the list of functions above | |
original: func, | |
size: "size" + (utils.indexOf(sizeFuncs, func) + 1), | |
value: body, | |
}, this.mode); | |
} else if (utils.contains(styleFuncs, func)) { | |
// If we see a styling function, parse out the implict body | |
body = this.parseExpression(true); | |
return new ParseNode("styling", { | |
// Figure out what style to use by pulling out the style from | |
// the function name | |
original: func, | |
style: func.slice(1, func.length - 5), | |
value: body, | |
}, this.mode); | |
} else { | |
// Defer to parseFunction if it's not a function we handle | |
return this.parseFunction(start); | |
} | |
}; | |
/** | |
* Parses an entire function, including its base and all of its arguments. | |
* The base might either have been parsed already, in which case | |
* it is provided as an argument, or it's the next group in the input. | |
* | |
* @param {ParseFuncOrArgument=} baseGroup optional as described above | |
* @return {?ParseNode} | |
*/ | |
Parser.prototype.parseFunction = function(baseGroup) { | |
if (!baseGroup) { | |
baseGroup = this.parseGroup(); | |
} | |
if (baseGroup) { | |
if (baseGroup.isFunction) { | |
var func = baseGroup.result; | |
var funcData = functions[func]; | |
if (this.mode === "text" && !funcData.allowedInText) { | |
// throw new ParseError( | |
// "Can't use function '" + func + "' in text mode", | |
// this.lexer, baseGroup.position); | |
} | |
var args = this.parseArguments(func, funcData); | |
var result = this.callFunction(func, args, args.pop()); | |
return new ParseNode(result.type, result, this.mode); | |
} else { | |
return baseGroup.result; | |
} | |
} else { | |
return null; | |
} | |
}; | |
/** | |
* Call a function handler with a suitable context and arguments. | |
*/ | |
Parser.prototype.callFunction = function(name, args, positions) { | |
var context = { | |
funcName: name, | |
parser: this, | |
lexer: this.lexer, | |
positions: positions, | |
}; | |
return functions[name].handler(context, args); | |
}; | |
/** | |
* Parses the arguments of a function or environment | |
* | |
* @param {string} func "\name" or "\begin{name}" | |
* @param {{numArgs:number,numOptionalArgs:number|undefined}} funcData | |
* @return the array of arguments, with the list of positions as last element | |
*/ | |
Parser.prototype.parseArguments = function(func, funcData) { | |
var totalArgs = funcData.numArgs + funcData.numOptionalArgs; | |
if (totalArgs === 0) { | |
return [[this.pos]]; | |
} | |
var baseGreediness = funcData.greediness; | |
var positions = [this.pos]; | |
var args = []; | |
for (var i = 0; i < totalArgs; i++) { | |
var argType = funcData.argTypes && funcData.argTypes[i]; | |
var arg; | |
if (i < funcData.numOptionalArgs) { | |
if (argType) { | |
arg = this.parseSpecialGroup(argType, true); | |
} else { | |
arg = this.parseOptionalGroup(); | |
} | |
if (!arg) { | |
args.push(null); | |
positions.push(this.pos); | |
continue; | |
} | |
} else { | |
if (argType) { | |
arg = this.parseSpecialGroup(argType); | |
} else { | |
arg = this.parseGroup(); | |
} | |
if (!arg) { | |
if (!this.settings.throwOnError && | |
this.nextToken.text[0] === "\\") { | |
arg = new ParseFuncOrArgument( | |
this.handleUnsupportedCmd(this.nextToken.text), | |
false); | |
} else { | |
throw new ParseError( | |
"Expected group after '" + func + "'", | |
this.lexer, this.pos); | |
} | |
} | |
} | |
var argNode; | |
if (arg.isFunction) { | |
var argGreediness = | |
functions[arg.result].greediness; | |
if (argGreediness > baseGreediness) { | |
argNode = this.parseFunction(arg); | |
} else { | |
// throw new ParseError( | |
// "Got function '" + arg.result + "' as " + | |
// "argument to '" + func + "'", | |
// this.lexer, this.pos - 1); | |
} | |
} else { | |
argNode = arg.result; | |
} | |
args.push(argNode); | |
positions.push(this.pos); | |
} | |
args.push(positions); | |
return args; | |
}; | |
/** | |
* Parses a group when the mode is changing. Takes a position, a new mode, and | |
* an outer mode that is used to parse the outside. | |
* | |
* @return {?ParseFuncOrArgument} | |
*/ | |
Parser.prototype.parseSpecialGroup = function(innerMode, optional) { | |
var outerMode = this.mode; | |
// Handle `original` argTypes | |
if (innerMode === "original") { | |
innerMode = outerMode; | |
} | |
if (innerMode === "color" || innerMode === "size") { | |
// color and size modes are special because they should have braces and | |
// should only lex a single symbol inside | |
var openBrace = this.nextToken; | |
if (optional && openBrace.text !== "[") { | |
// optional arguments should return null if they don't exist | |
return null; | |
} | |
// The call to expect will lex the token after the '{' in inner mode | |
this.mode = innerMode; | |
this.expect(optional ? "[" : "{"); | |
var inner = this.nextToken; | |
this.mode = outerMode; | |
var data; | |
if (innerMode === "color") { | |
data = inner.text; | |
} else { | |
data = inner.data; | |
} | |
this.consume(); // consume the token stored in inner | |
this.expect(optional ? "]" : "}"); | |
return new ParseFuncOrArgument( | |
new ParseNode(innerMode, data, outerMode), | |
false); | |
} else if (innerMode === "text") { | |
// text mode is special because it should ignore the whitespace before | |
// it | |
var whitespace = this.lexer.lex(this.pos, "whitespace"); | |
this.pos = whitespace.position; | |
} | |
// By the time we get here, innerMode is one of "text" or "math". | |
// We switch the mode of the parser, recurse, then restore the old mode. | |
this.mode = innerMode; | |
this.nextToken = this.lexer.lex(this.pos, innerMode); | |
var res; | |
if (optional) { | |
res = this.parseOptionalGroup(); | |
} else { | |
res = this.parseGroup(); | |
} | |
this.mode = outerMode; | |
this.nextToken = this.lexer.lex(this.pos, outerMode); | |
return res; | |
}; | |
/** | |
* Parses a group, which is either a single nucleus (like "x") or an expression | |
* in braces (like "{x+y}") | |
* | |
* @return {?ParseFuncOrArgument} | |
*/ | |
Parser.prototype.parseGroup = function() { | |
// Try to parse an open brace | |
if (this.nextToken.text === "{") { | |
// If we get a brace, parse an expression | |
this.consume(); | |
var expression = this.parseExpression(false); | |
// Make sure we get a close brace | |
this.expect("}"); | |
return new ParseFuncOrArgument( | |
new ParseNode("ordgroup", expression, this.mode), | |
false); | |
} else { | |
// Otherwise, just return a nucleus | |
return this.parseSymbol(); | |
} | |
}; | |
/** | |
* Parses a group, which is an expression in brackets (like "[x+y]") | |
* | |
* @return {?ParseFuncOrArgument} | |
*/ | |
Parser.prototype.parseOptionalGroup = function() { | |
// Try to parse an open bracket | |
if (this.nextToken.text === "[") { | |
// If we get a brace, parse an expression | |
this.consume(); | |
var expression = this.parseExpression(false, "]"); | |
// Make sure we get a close bracket | |
this.expect("]"); | |
return new ParseFuncOrArgument( | |
new ParseNode("ordgroup", expression, this.mode), | |
false); | |
} else { | |
// Otherwise, return null, | |
return null; | |
} | |
}; | |
/** | |
* Parse a single symbol out of the string. Here, we handle both the functions | |
* we have defined, as well as the single character symbols | |
* | |
* @return {?ParseFuncOrArgument} | |
*/ | |
Parser.prototype.parseSymbol = function() { | |
var nucleus = this.nextToken; | |
if (functions[nucleus.text]) { | |
this.consume(); | |
// If there exists a function with this name, we return the function and | |
// say that it is a function. | |
return new ParseFuncOrArgument( | |
nucleus.text, | |
true); | |
} else if (symbols[this.mode][nucleus.text]) { | |
this.consume(); | |
// Otherwise if this is a no-argument function, find the type it | |
// corresponds to in the symbols map | |
return new ParseFuncOrArgument( | |
new ParseNode(symbols[this.mode][nucleus.text].group, | |
nucleus.text, this.mode), | |
false); | |
} else if (nucleus.text == "EOF" || nucleus.text == "{") { | |
return null; | |
} else { | |
this.consume(); | |
// console.error(nucleus); | |
return new ParseFuncOrArgument( | |
new ParseNode(symbols["math"]["\\sigma"].group, | |
nucleus.text, this.mode), | |
false); | |
// console.log(nucleus.text); | |
// return null; | |
} | |
}; | |
Parser.prototype.ParseNode = ParseNode; | |
module.exports = Parser; | |