You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1754 lines
57 KiB
1754 lines
57 KiB
// Jison, an LR(0), SLR(1), LARL(1), LR(1) Parser Generator
|
|
// Zachary Carter <zach@carter.name>
|
|
// MIT X Licensed
|
|
|
|
var typal = require('./util/typal').typal;
|
|
var Set = require('./util/set').Set;
|
|
|
|
var Jison = exports.Jison = exports;
|
|
|
|
// detect print
|
|
if (typeof console !== 'undefined' && console.log) {
|
|
Jison.print = console.log;
|
|
} else if (typeof puts !== 'undefined') {
|
|
Jison.print = function print () { puts([].join.call(arguments, ' ')); };
|
|
} else if (typeof print !== 'undefined') {
|
|
Jison.print = print;
|
|
} else {
|
|
Jison.print = function print () {};
|
|
}
|
|
|
|
Jison.Parser = (function () {
|
|
|
|
// iterator utility
|
|
function each (obj, func) {
|
|
if (obj.forEach) {
|
|
obj.forEach(func);
|
|
} else {
|
|
var p;
|
|
for (p in obj) {
|
|
if (obj.hasOwnProperty(p)) {
|
|
func.call(obj, obj[p], p, obj);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
var Nonterminal = typal.construct({
|
|
constructor: function Nonterminal (symbol) {
|
|
this.symbol = symbol;
|
|
this.productions = new Set();
|
|
this.first = [];
|
|
this.follows = [];
|
|
this.nullable = false;
|
|
},
|
|
toString: function Nonterminal_toString () {
|
|
var str = this.symbol+"\n";
|
|
str += (this.nullable ? 'nullable' : 'not nullable');
|
|
str += "\nFirsts: "+this.first.join(', ');
|
|
str += "\nFollows: "+this.first.join(', ');
|
|
str += "\nProductions:\n "+this.productions.join('\n ');
|
|
|
|
return str;
|
|
}
|
|
});
|
|
|
|
var Production = typal.construct({
|
|
constructor: function Production (symbol, handle, id) {
|
|
this.symbol = symbol;
|
|
this.handle = handle;
|
|
this.nullable = false;
|
|
this.id = id;
|
|
this.first = [];
|
|
this.precedence = 0;
|
|
},
|
|
toString: function Production_toString () {
|
|
return this.symbol+" -> "+this.handle.join(' ');
|
|
}
|
|
});
|
|
|
|
var generator = typal.beget();
|
|
|
|
generator.constructor = function Jison_Generator (grammar, opt) {
|
|
var options = typal.mix.call({}, grammar.options, opt);
|
|
this.terms = {};
|
|
this.operators = {};
|
|
this.productions = [];
|
|
this.conflicts = 0;
|
|
this.resolutions = [];
|
|
this.options = options;
|
|
this.parseParams = grammar.parseParams;
|
|
this.yy = {}; // accessed as yy free variable in the parser/lexer actions
|
|
|
|
// source included in semantic action execution scope
|
|
if (grammar.actionInclude) {
|
|
if (typeof grammar.actionInclude === 'function') {
|
|
grammar.actionInclude = String(grammar.actionInclude).replace(/^\s*function \(\) \{/, '').replace(/\}\s*$/, '');
|
|
}
|
|
this.actionInclude = grammar.actionInclude;
|
|
}
|
|
this.moduleInclude = grammar.moduleInclude || '';
|
|
|
|
this.DEBUG = options.debug || false;
|
|
if (this.DEBUG) this.mix(generatorDebug); // mixin debug methods
|
|
|
|
this.processGrammar(grammar);
|
|
|
|
};
|
|
|
|
generator.processGrammar = function processGrammarDef (grammar) {
|
|
var bnf = grammar.bnf,
|
|
tokens = grammar.tokens,
|
|
nonterminals = this.nonterminals = {},
|
|
productions = this.productions,
|
|
self = this;
|
|
|
|
if (tokens) {
|
|
if (typeof tokens === 'string') {
|
|
tokens = tokens.trim().split(' ');
|
|
} else {
|
|
tokens = tokens.slice(0);
|
|
}
|
|
}
|
|
|
|
var symbols = this.symbols = [];
|
|
|
|
// calculate precedence of operators
|
|
var operators = this.operators = processOperators(grammar.operators);
|
|
|
|
// build productions from cfg
|
|
this.buildProductions(bnf, productions, nonterminals, symbols, operators);
|
|
|
|
if (tokens && this.terminals.length !== tokens.length) {
|
|
self.trace("Warning: declared tokens differ from tokens found in rules.");
|
|
self.trace(this.terminals);
|
|
self.trace(tokens);
|
|
}
|
|
|
|
// augment the grammar
|
|
this.augmentGrammar(grammar);
|
|
};
|
|
|
|
generator.augmentGrammar = function augmentGrammar (grammar) {
|
|
if (this.productions.length === 0) {
|
|
throw new Error("Grammar error: must have at least one rule.");
|
|
}
|
|
// use specified start symbol, or default to first user defined production
|
|
this.startSymbol = grammar.start || grammar.startSymbol || this.productions[0].symbol;
|
|
if (!this.nonterminals[this.startSymbol]) {
|
|
throw new Error("Grammar error: startSymbol must be a non-terminal found in your grammar.");
|
|
}
|
|
this.EOF = "$end";
|
|
|
|
// augment the grammar
|
|
var acceptProduction = new Production('$accept', [this.startSymbol, '$end'], 0);
|
|
this.productions.unshift(acceptProduction);
|
|
|
|
// prepend parser tokens
|
|
this.symbols.unshift("$accept",this.EOF);
|
|
this.symbols_.$accept = 0;
|
|
this.symbols_[this.EOF] = 1;
|
|
this.terminals.unshift(this.EOF);
|
|
|
|
this.nonterminals.$accept = new Nonterminal("$accept");
|
|
this.nonterminals.$accept.productions.push(acceptProduction);
|
|
|
|
// add follow $ to start symbol
|
|
this.nonterminals[this.startSymbol].follows.push(this.EOF);
|
|
};
|
|
|
|
// set precedence and associativity of operators
|
|
function processOperators (ops) {
|
|
if (!ops) return {};
|
|
var operators = {};
|
|
for (var i=0,k,prec;prec=ops[i]; i++) {
|
|
for (k=1;k < prec.length;k++) {
|
|
operators[prec[k]] = {precedence: i+1, assoc: prec[0]};
|
|
}
|
|
}
|
|
return operators;
|
|
}
|
|
|
|
|
|
generator.buildProductions = function buildProductions(bnf, productions, nonterminals, symbols, operators) {
|
|
// Because of the switch limits in v8 this should probably be split into several methods for the different ranges
|
|
|
|
var actions = [
|
|
'/* self == yyval */',
|
|
this.actionInclude || '',
|
|
'var $0 = $$.length - 1;',
|
|
'switch (yystate) {'
|
|
];
|
|
|
|
var actionGroups = {};
|
|
var prods, symbol;
|
|
var productions_ = [0];
|
|
var symbolId = 1;
|
|
var symbols_ = {};
|
|
|
|
var her = false; // has error recovery
|
|
|
|
function addSymbol (s) {
|
|
if (s && !symbols_[s]) {
|
|
symbols_[s] = ++symbolId;
|
|
symbols.push(s);
|
|
}
|
|
}
|
|
|
|
// add error symbol; will be third symbol, or "2" ($accept, $end, error)
|
|
addSymbol("error");
|
|
|
|
for (symbol in bnf) {
|
|
if (!bnf.hasOwnProperty(symbol)) continue;
|
|
|
|
addSymbol(symbol);
|
|
nonterminals[symbol] = new Nonterminal(symbol);
|
|
|
|
if (typeof bnf[symbol] === 'string') {
|
|
prods = bnf[symbol].split(/\s*\|\s*/g);
|
|
} else {
|
|
prods = bnf[symbol].slice(0);
|
|
}
|
|
|
|
prods.forEach(buildProduction);
|
|
}
|
|
for (var action in actionGroups)
|
|
actions.push(actionGroups[action].join(' '), action, 'break;');
|
|
|
|
var sym, terms = [], terms_ = {};
|
|
each(symbols_, function (id, sym) {
|
|
if (!nonterminals[sym]) {
|
|
terms.push(sym);
|
|
terms_[id] = sym;
|
|
}
|
|
});
|
|
|
|
this.hasErrorRecovery = her;
|
|
|
|
this.terminals = terms;
|
|
this.terminals_ = terms_;
|
|
this.symbols_ = symbols_;
|
|
|
|
this.productions_ = productions_;
|
|
actions.push('}');
|
|
|
|
actions = actions.join("\n")
|
|
.replace(/YYABORT/g, 'return false')
|
|
.replace(/YYACCEPT/g, 'return true');
|
|
|
|
|
|
var yyvalParam = "this";
|
|
var parameters = "self, yytext, yy, yystate /* action[1] */, $$ /* vstack */";
|
|
if (this.parseParams) parameters += ', ' + this.parseParams.join(', ');
|
|
|
|
this.performAction = "function performAction(" + parameters + ") {\n" + actions + "\n}";
|
|
|
|
function buildProduction (handle) {
|
|
var r, rhs, i;
|
|
if (handle.constructor === Array) {
|
|
rhs = (typeof handle[0] === 'string') ?
|
|
handle[0].trim().split(' ') :
|
|
handle[0].slice(0);
|
|
|
|
for (i=0; i<rhs.length; i++) {
|
|
if (rhs[i] === 'error') her = true;
|
|
if (!symbols_[rhs[i]]) {
|
|
addSymbol(rhs[i]);
|
|
}
|
|
}
|
|
|
|
if (typeof handle[1] === 'string' || handle.length == 3) {
|
|
// semantic action specified
|
|
var label = 'case ' + (productions.length+1) + ':', action = handle[1];
|
|
|
|
// replace named semantic values ($nonterminal)
|
|
if (action.match(/[$@][a-zA-Z][a-zA-Z0-9_]*/)) {
|
|
var count = {},
|
|
names = {};
|
|
for (i=0;i<rhs.length;i++) {
|
|
// check for aliased names, e.g., id[alias]
|
|
var rhs_i = rhs[i].match(/\[[a-zA-Z][a-zA-Z0-9_-]*\]/);
|
|
if (rhs_i) {
|
|
rhs_i = rhs_i[0].substr(1, rhs_i[0].length-2);
|
|
rhs[i] = rhs[i].substr(0, rhs[i].indexOf('['));
|
|
} else {
|
|
rhs_i = rhs[i];
|
|
}
|
|
|
|
if (names[rhs_i]) {
|
|
names[rhs_i + (++count[rhs_i])] = i+1;
|
|
} else {
|
|
names[rhs_i] = i+1;
|
|
names[rhs_i + "1"] = i+1;
|
|
count[rhs_i] = 1;
|
|
}
|
|
}
|
|
action = action.replace(/\$([a-zA-Z][a-zA-Z0-9_]*)/g, function (str, pl) {
|
|
return names[pl] ? '$'+names[pl] : str;
|
|
}).replace(/@([a-zA-Z][a-zA-Z0-9_]*)/g, function (str, pl) {
|
|
return names[pl] ? '@'+names[pl] : str;
|
|
});
|
|
}
|
|
action = action
|
|
// replace references to $$ with this.$, and @$ with this._$
|
|
.replace(/([^'"])\$\$|^\$\$/g, '$1self.$').replace(/@[0$]/g, "self._$")
|
|
|
|
// replace semantic value references ($n) with stack value (stack[n])
|
|
.replace(/\$(-?\d+)/g, function (_, n) {
|
|
return "$$[$0" + (parseInt(n, 10) - rhs.length || '') + "]";
|
|
})
|
|
// same as above for location references (@n)
|
|
.replace(/@(-?\d+)/g, function (_, n) {
|
|
return "_$[$0" + (n - rhs.length || '') + "]";
|
|
});
|
|
|
|
if (action in actionGroups) actionGroups[action].push(label);
|
|
else actionGroups[action] = [label];
|
|
|
|
// done with aliases; strip them.
|
|
rhs = rhs.map(function(e,i) { return e.replace(/\[[a-zA-Z_][a-zA-Z0-9_-]*\]/g, '') });
|
|
r = new Production(symbol, rhs, productions.length+1);
|
|
// precedence specified also
|
|
if (handle[2] && operators[handle[2].prec]) {
|
|
r.precedence = operators[handle[2].prec].precedence;
|
|
}
|
|
} else {
|
|
// no action -> don't care about aliases; strip them.
|
|
rhs = rhs.map(function(e,i) { return e.replace(/\[[a-zA-Z_][a-zA-Z0-9_-]*\]/g, '') });
|
|
// only precedence specified
|
|
r = new Production(symbol, rhs, productions.length+1);
|
|
if (operators[handle[1].prec]) {
|
|
r.precedence = operators[handle[1].prec].precedence;
|
|
}
|
|
}
|
|
} else {
|
|
// no action -> don't care about aliases; strip them.
|
|
handle = handle.replace(/\[[a-zA-Z_][a-zA-Z0-9_-]*\]/g, '');
|
|
rhs = handle.trim().split(' ');
|
|
for (i=0; i<rhs.length; i++) {
|
|
if (rhs[i] === 'error') her = true;
|
|
if (!symbols_[rhs[i]]) {
|
|
addSymbol(rhs[i]);
|
|
}
|
|
}
|
|
r = new Production(symbol, rhs, productions.length+1);
|
|
}
|
|
if (r.precedence === 0) {
|
|
// set precedence
|
|
for (i=r.handle.length-1; i>=0; i--) {
|
|
if (!(r.handle[i] in nonterminals) && r.handle[i] in operators) {
|
|
r.precedence = operators[r.handle[i]].precedence;
|
|
}
|
|
}
|
|
}
|
|
|
|
productions.push(r);
|
|
productions_.push([symbols_[r.symbol], r.handle[0] === '' ? 0 : r.handle.length]);
|
|
nonterminals[symbol].productions.push(r);
|
|
}
|
|
};
|
|
|
|
|
|
|
|
generator.createParser = function createParser () {
|
|
throw new Error('Calling abstract method.');
|
|
};
|
|
|
|
// noop. implemented in debug mixin
|
|
generator.trace = function trace () { };
|
|
|
|
generator.warn = function warn () {
|
|
var args = Array.prototype.slice.call(arguments,0);
|
|
Jison.print.call(null,args.join(""));
|
|
};
|
|
|
|
generator.error = function error (msg) {
|
|
throw new Error(msg);
|
|
};
|
|
|
|
// Generator debug mixin
|
|
|
|
var generatorDebug = {
|
|
trace: function trace () {
|
|
Jison.print.apply(null, arguments);
|
|
},
|
|
beforeprocessGrammar: function () {
|
|
this.trace("Processing grammar.");
|
|
},
|
|
afteraugmentGrammar: function () {
|
|
var trace = this.trace;
|
|
each(this.symbols, function (sym, i) {
|
|
trace(sym+"("+i+")");
|
|
});
|
|
}
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
* Mixin for common behaviors of lookahead parsers
|
|
* */
|
|
var lookaheadMixin = {};
|
|
|
|
lookaheadMixin.computeLookaheads = function computeLookaheads () {
|
|
if (this.DEBUG) this.mix(lookaheadDebug); // mixin debug methods
|
|
|
|
this.computeLookaheads = function () {};
|
|
this.nullableSets();
|
|
this.firstSets();
|
|
this.followSets();
|
|
};
|
|
|
|
// calculate follow sets typald on first and nullable
|
|
lookaheadMixin.followSets = function followSets () {
|
|
var productions = this.productions,
|
|
nonterminals = this.nonterminals,
|
|
self = this,
|
|
cont = true;
|
|
|
|
// loop until no further changes have been made
|
|
while(cont) {
|
|
cont = false;
|
|
|
|
productions.forEach(function Follow_prod_forEach (production, k) {
|
|
//self.trace(production.symbol,nonterminals[production.symbol].follows);
|
|
// q is used in Simple LALR algorithm determine follows in context
|
|
var q;
|
|
var ctx = !!self.go_;
|
|
|
|
var set = [],oldcount;
|
|
for (var i=0,t;t=production.handle[i];++i) {
|
|
if (!nonterminals[t]) continue;
|
|
|
|
// for Simple LALR algorithm, self.go_ checks if
|
|
if (ctx)
|
|
q = self.go_(production.symbol, production.handle.slice(0, i));
|
|
var bool = !ctx || q === parseInt(self.nterms_[t], 10);
|
|
|
|
if (i === production.handle.length+1 && bool) {
|
|
set = nonterminals[production.symbol].follows;
|
|
} else {
|
|
var part = production.handle.slice(i+1);
|
|
|
|
set = self.first(part);
|
|
if (self.nullable(part) && bool) {
|
|
set.push.apply(set, nonterminals[production.symbol].follows);
|
|
}
|
|
}
|
|
oldcount = nonterminals[t].follows.length;
|
|
Set.union(nonterminals[t].follows, set);
|
|
if (oldcount !== nonterminals[t].follows.length) {
|
|
cont = true;
|
|
}
|
|
}
|
|
});
|
|
}
|
|
};
|
|
|
|
// return the FIRST set of a symbol or series of symbols
|
|
lookaheadMixin.first = function first (symbol) {
|
|
// epsilon
|
|
if (symbol === '') {
|
|
return [];
|
|
// RHS
|
|
} else if (symbol instanceof Array) {
|
|
var firsts = [];
|
|
for (var i=0,t;t=symbol[i];++i) {
|
|
if (!this.nonterminals[t]) {
|
|
if (firsts.indexOf(t) === -1)
|
|
firsts.push(t);
|
|
} else {
|
|
Set.union(firsts, this.nonterminals[t].first);
|
|
}
|
|
if (!this.nullable(t))
|
|
break;
|
|
}
|
|
return firsts;
|
|
// terminal
|
|
} else if (!this.nonterminals[symbol]) {
|
|
return [symbol];
|
|
// nonterminal
|
|
} else {
|
|
return this.nonterminals[symbol].first;
|
|
}
|
|
};
|
|
|
|
// fixed-point calculation of FIRST sets
|
|
lookaheadMixin.firstSets = function firstSets () {
|
|
var productions = this.productions,
|
|
nonterminals = this.nonterminals,
|
|
self = this,
|
|
cont = true,
|
|
symbol,firsts;
|
|
|
|
// loop until no further changes have been made
|
|
while(cont) {
|
|
cont = false;
|
|
|
|
productions.forEach(function FirstSets_forEach (production, k) {
|
|
var firsts = self.first(production.handle);
|
|
if (firsts.length !== production.first.length) {
|
|
production.first = firsts;
|
|
cont=true;
|
|
}
|
|
});
|
|
|
|
for (symbol in nonterminals) {
|
|
firsts = [];
|
|
nonterminals[symbol].productions.forEach(function (production) {
|
|
Set.union(firsts, production.first);
|
|
});
|
|
if (firsts.length !== nonterminals[symbol].first.length) {
|
|
nonterminals[symbol].first = firsts;
|
|
cont=true;
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
// fixed-point calculation of NULLABLE
|
|
lookaheadMixin.nullableSets = function nullableSets () {
|
|
var firsts = this.firsts = {},
|
|
nonterminals = this.nonterminals,
|
|
self = this,
|
|
cont = true;
|
|
|
|
// loop until no further changes have been made
|
|
while(cont) {
|
|
cont = false;
|
|
|
|
// check if each production is nullable
|
|
this.productions.forEach(function (production, k) {
|
|
if (!production.nullable) {
|
|
for (var i=0,n=0,t;t=production.handle[i];++i) {
|
|
if (self.nullable(t)) n++;
|
|
}
|
|
if (n===i) { // production is nullable if all tokens are nullable
|
|
production.nullable = cont = true;
|
|
}
|
|
}
|
|
});
|
|
|
|
//check if each symbol is nullable
|
|
for (var symbol in nonterminals) {
|
|
if (!this.nullable(symbol)) {
|
|
for (var i=0,production;production=nonterminals[symbol].productions.item(i);i++) {
|
|
if (production.nullable)
|
|
nonterminals[symbol].nullable = cont = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
// check if a token or series of tokens is nullable
|
|
lookaheadMixin.nullable = function nullable (symbol) {
|
|
// epsilon
|
|
if (symbol === '') {
|
|
return true;
|
|
// RHS
|
|
} else if (symbol instanceof Array) {
|
|
for (var i=0,t;t=symbol[i];++i) {
|
|
if (!this.nullable(t))
|
|
return false;
|
|
}
|
|
return true;
|
|
// terminal
|
|
} else if (!this.nonterminals[symbol]) {
|
|
return false;
|
|
// nonterminal
|
|
} else {
|
|
return this.nonterminals[symbol].nullable;
|
|
}
|
|
};
|
|
|
|
|
|
// lookahead debug mixin
|
|
var lookaheadDebug = {
|
|
beforenullableSets: function () {
|
|
this.trace("Computing Nullable sets.");
|
|
},
|
|
beforefirstSets: function () {
|
|
this.trace("Computing First sets.");
|
|
},
|
|
beforefollowSets: function () {
|
|
this.trace("Computing Follow sets.");
|
|
},
|
|
afterfollowSets: function () {
|
|
var trace = this.trace;
|
|
each(this.nonterminals, function (nt, t) {
|
|
trace(nt, '\n');
|
|
});
|
|
}
|
|
};
|
|
|
|
/*
|
|
* Mixin for common LR parser behavior
|
|
* */
|
|
var lrGeneratorMixin = {};
|
|
|
|
lrGeneratorMixin.buildTable = function buildTable () {
|
|
if (this.DEBUG) this.mix(lrGeneratorDebug); // mixin debug methods
|
|
|
|
this.states = this.canonicalCollection();
|
|
this.table = this.parseTable(this.states);
|
|
this.defaultActions = findDefaults(this.table);
|
|
};
|
|
|
|
lrGeneratorMixin.Item = typal.construct({
|
|
constructor: function Item(production, dot, f, predecessor) {
|
|
this.production = production;
|
|
this.dotPosition = dot || 0;
|
|
this.follows = f || [];
|
|
this.predecessor = predecessor;
|
|
this.id = parseInt(production.id+'a'+this.dotPosition, 36);
|
|
this.markedSymbol = this.production.handle[this.dotPosition];
|
|
},
|
|
remainingHandle: function () {
|
|
return this.production.handle.slice(this.dotPosition+1);
|
|
},
|
|
eq: function (e) {
|
|
return e.id === this.id;
|
|
},
|
|
handleToString: function () {
|
|
var handle = this.production.handle.slice(0);
|
|
handle[this.dotPosition] = '.'+(handle[this.dotPosition]||'');
|
|
return handle.join(' ');
|
|
},
|
|
toString: function () {
|
|
var temp = this.production.handle.slice(0);
|
|
temp[this.dotPosition] = '.'+(temp[this.dotPosition]||'');
|
|
return this.production.symbol+" -> "+temp.join(' ') +
|
|
(this.follows.length === 0 ? "" : " #lookaheads= "+this.follows.join(' '));
|
|
}
|
|
});
|
|
|
|
lrGeneratorMixin.ItemSet = Set.prototype.construct({
|
|
afterconstructor: function () {
|
|
this.reductions = [];
|
|
this.goes = {};
|
|
this.edges = {};
|
|
this.shifts = false;
|
|
this.inadequate = false;
|
|
this.hash_ = {};
|
|
for (var i=this._items.length-1;i >=0;i--) {
|
|
this.hash_[this._items[i].id] = true; //i;
|
|
}
|
|
},
|
|
concat: function concat (set) {
|
|
var a = set._items || set;
|
|
for (var i=a.length-1;i >=0;i--) {
|
|
this.hash_[a[i].id] = true; //i;
|
|
}
|
|
this._items.push.apply(this._items, a);
|
|
return this;
|
|
},
|
|
push: function (item) {
|
|
this.hash_[item.id] = true;
|
|
return this._items.push(item);
|
|
},
|
|
contains: function (item) {
|
|
return this.hash_[item.id];
|
|
},
|
|
valueOf: function toValue () {
|
|
var v = this._items.map(function (a) {return a.id;}).sort().join('|');
|
|
this.valueOf = function toValue_inner() {return v;};
|
|
return v;
|
|
}
|
|
});
|
|
|
|
lrGeneratorMixin.closureOperation = function closureOperation (itemSet /*, closureSet*/) {
|
|
var closureSet = new this.ItemSet();
|
|
var self = this;
|
|
|
|
var set = itemSet,
|
|
itemQueue, syms = {};
|
|
|
|
do {
|
|
itemQueue = new Set();
|
|
closureSet.concat(set);
|
|
set.forEach(function CO_set_forEach (item) {
|
|
var symbol = item.markedSymbol;
|
|
|
|
// if token is a non-terminal, recursively add closures
|
|
if (symbol && self.nonterminals[symbol]) {
|
|
if(!syms[symbol]) {
|
|
self.nonterminals[symbol].productions.forEach(function CO_nt_forEach (production) {
|
|
var newItem = new self.Item(production, 0);
|
|
if(!closureSet.contains(newItem))
|
|
itemQueue.push(newItem);
|
|
});
|
|
syms[symbol] = true;
|
|
}
|
|
} else if (!symbol) {
|
|
// reduction
|
|
closureSet.reductions.push(item);
|
|
closureSet.inadequate = closureSet.reductions.length > 1 || closureSet.shifts;
|
|
} else {
|
|
// shift
|
|
closureSet.shifts = true;
|
|
closureSet.inadequate = closureSet.reductions.length > 0;
|
|
}
|
|
});
|
|
|
|
set = itemQueue;
|
|
|
|
} while (!itemQueue.isEmpty());
|
|
|
|
return closureSet;
|
|
};
|
|
|
|
lrGeneratorMixin.gotoOperation = function gotoOperation (itemSet, symbol) {
|
|
var gotoSet = new this.ItemSet(),
|
|
self = this;
|
|
|
|
itemSet.forEach(function goto_forEach(item, n) {
|
|
if (item.markedSymbol === symbol) {
|
|
gotoSet.push(new self.Item(item.production, item.dotPosition+1, item.follows, n));
|
|
}
|
|
});
|
|
|
|
return gotoSet.isEmpty() ? gotoSet : this.closureOperation(gotoSet);
|
|
};
|
|
|
|
/* Create unique set of item sets
|
|
* */
|
|
lrGeneratorMixin.canonicalCollection = function canonicalCollection () {
|
|
var item1 = new this.Item(this.productions[0], 0, [this.EOF]);
|
|
var firstState = this.closureOperation(new this.ItemSet(item1)),
|
|
states = new Set(firstState),
|
|
marked = 0,
|
|
self = this,
|
|
itemSet;
|
|
|
|
states.has = {};
|
|
states.has[firstState] = 0;
|
|
|
|
while (marked !== states.size()) {
|
|
itemSet = states.item(marked); marked++;
|
|
itemSet.forEach(function CC_itemSet_forEach (item) {
|
|
if (item.markedSymbol && item.markedSymbol !== self.EOF)
|
|
self.canonicalCollectionInsert(item.markedSymbol, itemSet, states, marked-1);
|
|
});
|
|
}
|
|
|
|
return states;
|
|
};
|
|
|
|
// Pushes a unique state into the que. Some parsing algorithms may perform additional operations
|
|
lrGeneratorMixin.canonicalCollectionInsert = function canonicalCollectionInsert (symbol, itemSet, states, stateNum) {
|
|
var g = this.gotoOperation(itemSet, symbol);
|
|
if (!g.predecessors)
|
|
g.predecessors = {};
|
|
// add g to que if not empty or duplicate
|
|
if (!g.isEmpty()) {
|
|
var gv = g.valueOf(),
|
|
i = states.has[gv];
|
|
if (i === -1 || typeof i === 'undefined') {
|
|
states.has[gv] = states.size();
|
|
itemSet.edges[symbol] = states.size(); // store goto transition for table
|
|
states.push(g);
|
|
g.predecessors[symbol] = [stateNum];
|
|
} else {
|
|
itemSet.edges[symbol] = i; // store goto transition for table
|
|
states.item(i).predecessors[symbol].push(stateNum);
|
|
}
|
|
}
|
|
};
|
|
|
|
var NONASSOC = 0;
|
|
lrGeneratorMixin.parseTable = function parseTable (itemSets) {
|
|
var states = [],
|
|
nonterminals = this.nonterminals,
|
|
operators = this.operators,
|
|
conflictedStates = {}, // array of [state, token] tuples
|
|
self = this,
|
|
s = 1, // shift
|
|
r = 2, // reduce
|
|
a = 3; // accept
|
|
|
|
// for each item set
|
|
itemSets.forEach(function (itemSet, k) {
|
|
var state = states[k] = {};
|
|
var action, stackSymbol;
|
|
|
|
// set shift and goto actions
|
|
for (stackSymbol in itemSet.edges) {
|
|
itemSet.forEach(function (item, j) {
|
|
// find shift and goto actions
|
|
if (item.markedSymbol == stackSymbol) {
|
|
var gotoState = itemSet.edges[stackSymbol];
|
|
if (nonterminals[stackSymbol]) {
|
|
// store state to go to after a reduce
|
|
//self.trace(k, stackSymbol, 'g'+gotoState);
|
|
state[self.symbols_[stackSymbol]] = gotoState;
|
|
} else {
|
|
//self.trace(k, stackSymbol, 's'+gotoState);
|
|
state[self.symbols_[stackSymbol]] = [s,gotoState];
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
// set accept action
|
|
itemSet.forEach(function (item, j) {
|
|
if (item.markedSymbol == self.EOF) {
|
|
// accept
|
|
state[self.symbols_[self.EOF]] = [a];
|
|
//self.trace(k, self.EOF, state[self.EOF]);
|
|
}
|
|
});
|
|
|
|
var allterms = self.lookAheads ? false : self.terminals;
|
|
|
|
// set reductions and resolve potential conflicts
|
|
itemSet.reductions.forEach(function (item, j) {
|
|
// if parser uses lookahead, only enumerate those terminals
|
|
var terminals = allterms || self.lookAheads(itemSet, item);
|
|
|
|
terminals.forEach(function (stackSymbol) {
|
|
action = state[self.symbols_[stackSymbol]];
|
|
var op = operators[stackSymbol];
|
|
|
|
// Reading a terminal and current position is at the end of a production, try to reduce
|
|
if (action || action && action.length) {
|
|
var sol = resolveConflict(item.production, op, [r,item.production.id], action[0] instanceof Array ? action[0] : action);
|
|
self.resolutions.push([k,stackSymbol,sol]);
|
|
if (sol.bydefault) {
|
|
self.conflicts++;
|
|
if (!self.DEBUG) {
|
|
self.warn('Conflict in grammar: multiple actions possible when lookahead token is ',stackSymbol,' in state ',k, "\n- ", printAction(sol.r, self), "\n- ", printAction(sol.s, self));
|
|
conflictedStates[k] = true;
|
|
}
|
|
if (self.options.noDefaultResolve) {
|
|
if (!(action[0] instanceof Array))
|
|
action = [action];
|
|
action.push(sol.r);
|
|
}
|
|
} else {
|
|
action = sol.action;
|
|
}
|
|
} else {
|
|
action = [r,item.production.id];
|
|
}
|
|
if (action && action.length) {
|
|
state[self.symbols_[stackSymbol]] = action;
|
|
} else if (action === NONASSOC) {
|
|
state[self.symbols_[stackSymbol]] = undefined;
|
|
}
|
|
});
|
|
});
|
|
|
|
});
|
|
|
|
if (!self.DEBUG && self.conflicts > 0) {
|
|
self.warn("\nStates with conflicts:");
|
|
each(conflictedStates, function (val, state) {
|
|
self.warn('State '+state);
|
|
self.warn(' ',itemSets.item(state).join("\n "));
|
|
});
|
|
}
|
|
|
|
return states;
|
|
};
|
|
|
|
// find states with only one action, a reduction
|
|
function findDefaults (states) {
|
|
var defaults = {};
|
|
states.forEach(function (state, k) {
|
|
var i = 0;
|
|
for (var act in state) {
|
|
if ({}.hasOwnProperty.call(state, act)) i++;
|
|
}
|
|
|
|
if (i === 1 && state[act][0] === 2) {
|
|
// only one action in state and it's a reduction
|
|
defaults[k] = state[act];
|
|
}
|
|
});
|
|
|
|
return defaults;
|
|
}
|
|
|
|
// resolves shift-reduce and reduce-reduce conflicts
|
|
function resolveConflict (production, op, reduce, shift) {
|
|
var sln = {production: production, operator: op, r: reduce, s: shift},
|
|
s = 1, // shift
|
|
r = 2, // reduce
|
|
a = 3; // accept
|
|
|
|
if (shift[0] === r) {
|
|
sln.msg = "Resolve R/R conflict (use first production declared in grammar.)";
|
|
sln.action = shift[1] < reduce[1] ? shift : reduce;
|
|
if (shift[1] !== reduce[1]) sln.bydefault = true;
|
|
return sln;
|
|
}
|
|
|
|
if (production.precedence === 0 || !op) {
|
|
sln.msg = "Resolve S/R conflict (shift by default.)";
|
|
sln.bydefault = true;
|
|
sln.action = shift;
|
|
} else if (production.precedence < op.precedence ) {
|
|
sln.msg = "Resolve S/R conflict (shift for higher precedent operator.)";
|
|
sln.action = shift;
|
|
} else if (production.precedence === op.precedence) {
|
|
if (op.assoc === "right" ) {
|
|
sln.msg = "Resolve S/R conflict (shift for right associative operator.)";
|
|
sln.action = shift;
|
|
} else if (op.assoc === "left" ) {
|
|
sln.msg = "Resolve S/R conflict (reduce for left associative operator.)";
|
|
sln.action = reduce;
|
|
} else if (op.assoc === "nonassoc" ) {
|
|
sln.msg = "Resolve S/R conflict (no action for non-associative operator.)";
|
|
sln.action = NONASSOC;
|
|
}
|
|
} else {
|
|
sln.msg = "Resolve conflict (reduce for higher precedent production.)";
|
|
sln.action = reduce;
|
|
}
|
|
|
|
return sln;
|
|
}
|
|
|
|
lrGeneratorMixin.generate = function parser_generate (opt) {
|
|
opt = typal.mix.call({}, this.options, opt);
|
|
var code = "";
|
|
|
|
// check for illegal identifier
|
|
if (!opt.moduleName || !opt.moduleName.match(/^[A-Za-z_$][A-Za-z0-9_$]*$/)) {
|
|
opt.moduleName = "parser";
|
|
}
|
|
switch (opt.moduleType) {
|
|
case "js":
|
|
code = this.generateModule(opt);
|
|
break;
|
|
case "amd":
|
|
code = this.generateAMDModule(opt);
|
|
break;
|
|
default:
|
|
code = this.generateCommonJSModule(opt);
|
|
break;
|
|
}
|
|
|
|
return code;
|
|
};
|
|
|
|
lrGeneratorMixin.generateAMDModule = function generateAMDModule(opt){
|
|
opt = typal.mix.call({}, this.options, opt);
|
|
var module = this.generateModule_();
|
|
var out = '\n\ndefine(function(require){\n'
|
|
+ module.commonCode
|
|
+ '\nvar parser = '+ module.moduleCode
|
|
+ "\n"+this.moduleInclude
|
|
+ (this.lexer && this.lexer.generateModule ?
|
|
'\n' + this.lexer.generateModule() +
|
|
'\nparser.lexer = lexer;' : '')
|
|
+ '\nreturn parser;'
|
|
+ '\n});'
|
|
return out;
|
|
};
|
|
|
|
lrGeneratorMixin.generateCommonJSModule = function generateCommonJSModule (opt) {
|
|
opt = typal.mix.call({}, this.options, opt);
|
|
var moduleName = opt.moduleName || "parser";
|
|
var out = this.generateModule(opt)
|
|
+ "\n\n\nif (typeof require !== 'undefined' && typeof exports !== 'undefined') {"
|
|
+ "\nexports.parser = "+moduleName+";"
|
|
+ "\nexports.Parser = "+moduleName+".Parser;"
|
|
+ "\nexports.parse = function () { return "+moduleName+".parse.apply("+moduleName+", arguments); };"
|
|
+ "\n}";
|
|
|
|
return out;
|
|
};
|
|
|
|
lrGeneratorMixin.generateModule = function generateModule (opt) {
|
|
opt = typal.mix.call({}, this.options, opt);
|
|
var moduleName = opt.moduleName || "parser";
|
|
var out = "/* parser generated by jison-fork */\n";
|
|
|
|
out += (moduleName.match(/\./) ? moduleName : "var "+moduleName) +
|
|
" = " + this.generateModuleExpr();
|
|
|
|
return out;
|
|
};
|
|
|
|
|
|
lrGeneratorMixin.generateModuleExpr = function generateModuleExpr () {
|
|
var out = '';
|
|
var module = this.generateModule_();
|
|
|
|
out += "(function(){\n";
|
|
out += module.commonCode;
|
|
out += "\nvar parser = "+module.moduleCode;
|
|
out += "\n"+this.moduleInclude;
|
|
if (this.lexer && this.lexer.generateModule) {
|
|
out += this.lexer.generateModule();
|
|
out += "\nparser.lexer = lexer;";
|
|
}
|
|
out += "\nfunction Parser () {\n this.yy = {};\n}\n"
|
|
+ "Parser.prototype = parser;"
|
|
+ "parser.Parser = Parser;"
|
|
+ "\nreturn new Parser;\n})();";
|
|
|
|
return out;
|
|
};
|
|
|
|
function addTokenStack (fn) {
|
|
var parseFn = fn;
|
|
return fn;
|
|
}
|
|
|
|
// lex function that supports token stacks
|
|
function tokenStackLex() {
|
|
var token;
|
|
token = tstack.pop() || lexer.lex() || EOF;
|
|
// if token isn't its numeric value, convert
|
|
if (typeof token !== 'number') {
|
|
if (token instanceof Array) {
|
|
tstack = token;
|
|
token = tstack.pop();
|
|
}
|
|
token = self.symbols_[token] || token;
|
|
}
|
|
return token;
|
|
}
|
|
|
|
// Generates the code of the parser module, which consists of two parts:
|
|
// - module.commonCode: initialization code that should be placed before the module
|
|
// - module.moduleCode: code that creates the module object
|
|
lrGeneratorMixin.generateModule_ = function generateModule_ () {
|
|
var parseFn = String(parser.parse);
|
|
// if (!this.hasErrorRecovery) {
|
|
// parseFn = removeErrorRecovery(parseFn);
|
|
// }
|
|
|
|
// Generate code with fresh variable names
|
|
nextVariableId = 0;
|
|
var tableCode = this.generateTableCode(this.table);
|
|
|
|
// Generate the initialization code
|
|
var commonCode = tableCode.commonCode;
|
|
|
|
// Generate the module creation code
|
|
var moduleCode = "{";
|
|
moduleCode += [
|
|
"trace: " + String(this.trace || parser.trace),
|
|
"yy: {}",
|
|
"symbols_: " + JSON.stringify(this.symbols_),
|
|
"terminals_: " + JSON.stringify(this.terminals_).replace(/"([0-9]+)":/g,"$1:"),
|
|
"productions_: " + JSON.stringify(this.productions_),
|
|
"performAction: " + String(this.performAction),
|
|
"table: " + tableCode.moduleCode,
|
|
"defaultActions: " + JSON.stringify(this.defaultActions).replace(/"([0-9]+)":/g,"$1:"),
|
|
"parseError: " + String(this.parseError || (this.hasErrorRecovery ? traceParseError : parser.parseError)),
|
|
"parse: " + parseFn
|
|
].join(",\n");
|
|
moduleCode += "};";
|
|
|
|
return { commonCode: commonCode, moduleCode: moduleCode }
|
|
};
|
|
|
|
// Generate code that represents the specified parser table
|
|
lrGeneratorMixin.generateTableCode = function (table) {
|
|
var moduleCode = JSON.stringify(table);
|
|
var variables = [createObjectCode];
|
|
|
|
// Don't surround numerical property name numbers in quotes
|
|
moduleCode = moduleCode.replace(/"([0-9]+)"(?=:)/g, "$1");
|
|
|
|
// Replace objects with several identical values by function calls
|
|
// e.g., { 1: [6, 7]; 3: [6, 7], 4: [6, 7], 5: 8 } = o([1, 3, 4], [6, 7], { 5: 8 })
|
|
moduleCode = moduleCode.replace(/\{\d+:[^\}]+,\d+:[^\}]+\}/g, function (object) {
|
|
// Find the value that occurs with the highest number of keys
|
|
var value, frequentValue, key, keys = {}, keyCount, maxKeyCount = 0,
|
|
keyValue, keyValues = [], keyValueMatcher = /(\d+):([^:]+)(?=,\d+:|\})/g;
|
|
|
|
while ((keyValue = keyValueMatcher.exec(object))) {
|
|
// For each value, store the keys where that value occurs
|
|
key = keyValue[1];
|
|
value = keyValue[2];
|
|
keyCount = 1;
|
|
|
|
if (!(value in keys)) {
|
|
keys[value] = [key];
|
|
} else {
|
|
keyCount = keys[value].push(key);
|
|
}
|
|
// Remember this value if it is the most frequent one
|
|
if (keyCount > maxKeyCount) {
|
|
maxKeyCount = keyCount;
|
|
frequentValue = value;
|
|
}
|
|
}
|
|
// Construct the object with a function call if the most frequent value occurs multiple times
|
|
if (maxKeyCount > 1) {
|
|
// Collect all non-frequent values into a remainder object
|
|
for (value in keys) {
|
|
if (value !== frequentValue) {
|
|
for (var k = keys[value], i = 0, l = k.length; i < l; i++) {
|
|
keyValues.push(k[i] + ':' + value);
|
|
}
|
|
}
|
|
}
|
|
keyValues = keyValues.length ? ',{' + keyValues.join(',') + '}' : '';
|
|
// Create the function call `o(keys, value, remainder)`
|
|
object = 'o([' + keys[frequentValue].join(',') + '],' + frequentValue + keyValues + ')';
|
|
}
|
|
return object;
|
|
});
|
|
|
|
// Count occurrences of number lists
|
|
var list;
|
|
var lists = {};
|
|
var listMatcher = /\[[0-9,]+\]/g;
|
|
|
|
while (list = listMatcher.exec(moduleCode)) {
|
|
lists[list] = (lists[list] || 0) + 1;
|
|
}
|
|
|
|
// Replace frequently occurring number lists with variables
|
|
moduleCode = moduleCode.replace(listMatcher, function (list) {
|
|
var listId = lists[list];
|
|
// If listId is a number, it represents the list's occurrence frequency
|
|
if (typeof listId === 'number') {
|
|
// If the list does not occur frequently, represent it by the list
|
|
if (listId === 1) {
|
|
lists[list] = listId = list;
|
|
// If the list occurs frequently, represent it by a newly assigned variable
|
|
} else {
|
|
lists[list] = listId = createVariable();
|
|
variables.push(listId + '=' + list);
|
|
}
|
|
}
|
|
return listId;
|
|
});
|
|
|
|
// Return the variable initialization code and the table code
|
|
return {
|
|
commonCode: 'var ' + variables.join(',') + ';',
|
|
moduleCode: moduleCode
|
|
};
|
|
};
|
|
// Function that extends an object with the given value for all given keys
|
|
// e.g., o([1, 3, 4], [6, 7], { x: 1, y: 2 }) = { 1: [6, 7]; 3: [6, 7], 4: [6, 7], x: 1, y: 2 }
|
|
var createObjectCode = 'o=function(k,v,o,l){' +
|
|
'for(o=o||{},l=k.length;l--;o[k[l]]=v);' +
|
|
'return o}';
|
|
|
|
// Creates a variable with a unique name
|
|
function createVariable() {
|
|
var id = nextVariableId++;
|
|
var name = '$V';
|
|
|
|
do {
|
|
name += variableTokens[id % variableTokensLength];
|
|
id = ~~(id / variableTokensLength);
|
|
} while (id !== 0);
|
|
|
|
return name;
|
|
}
|
|
|
|
var nextVariableId = 0;
|
|
var variableTokens = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$';
|
|
var variableTokensLength = variableTokens.length;
|
|
|
|
// debug mixin for LR parser generators
|
|
|
|
function printAction (a, gen) {
|
|
var s = a[0] == 1 ? 'shift token (then go to state '+a[1]+')' :
|
|
a[0] == 2 ? 'reduce by rule: '+gen.productions[a[1]] :
|
|
'accept' ;
|
|
|
|
return s;
|
|
}
|
|
|
|
var lrGeneratorDebug = {
|
|
beforeparseTable: function () {
|
|
this.trace("Building parse table.");
|
|
},
|
|
afterparseTable: function () {
|
|
var self = this;
|
|
if (this.conflicts > 0) {
|
|
this.resolutions.forEach(function (r, i) {
|
|
if (r[2].bydefault) {
|
|
self.warn('Conflict at state: ',r[0], ', token: ',r[1], "\n ", printAction(r[2].r, self), "\n ", printAction(r[2].s, self));
|
|
}
|
|
});
|
|
this.trace("\n"+this.conflicts+" Conflict(s) found in grammar.");
|
|
}
|
|
this.trace("Done.");
|
|
},
|
|
aftercanonicalCollection: function (states) {
|
|
var trace = this.trace;
|
|
trace("\nItem sets\n------");
|
|
|
|
states.forEach(function (state, i) {
|
|
trace("\nitem set",i,"\n"+state.join("\n"), '\ntransitions -> ', JSON.stringify(state.edges));
|
|
});
|
|
}
|
|
};
|
|
|
|
var parser = typal.beget();
|
|
|
|
lrGeneratorMixin.createParser = function createParser () {
|
|
|
|
var p = eval(this.generateModuleExpr());
|
|
|
|
// for debugging
|
|
p.productions = this.productions;
|
|
|
|
var self = this;
|
|
function bind(method) {
|
|
return function() {
|
|
self.lexer = p.lexer;
|
|
return self[method].apply(self, arguments);
|
|
};
|
|
}
|
|
|
|
// backwards compatability
|
|
p.generate = bind('generate');
|
|
p.generateAMDModule = bind('generateAMDModule');
|
|
p.generateModule = bind('generateModule');
|
|
p.generateCommonJSModule = bind('generateCommonJSModule');
|
|
|
|
return p;
|
|
};
|
|
|
|
parser.trace = generator.trace;
|
|
parser.warn = generator.warn;
|
|
parser.error = generator.error;
|
|
|
|
function traceParseError (err, hash) {
|
|
this.trace(err);
|
|
}
|
|
|
|
function parseError (str, hash) {
|
|
if (hash.recoverable) {
|
|
this.trace(str);
|
|
} else {
|
|
throw new Error(str);
|
|
}
|
|
}
|
|
|
|
parser.parseError = lrGeneratorMixin.parseError = parseError;
|
|
|
|
parser.parse = function parse (input, script = null) {
|
|
|
|
// For Imba we are going to drop most of the features that are not used
|
|
// Locations are provided by the tokens from the lexer directly - so drop yylloc
|
|
// We dont really need the shared state (it seems)
|
|
|
|
var self = this,
|
|
stack = [0],
|
|
tstack = [], // token stack
|
|
vstack = [null], // semantic value stack
|
|
table = this.table,
|
|
yytext = '',
|
|
yylineno = 0,
|
|
yyleng = 0,
|
|
recovering = 0,
|
|
TERROR = 2,
|
|
EOF = 1;
|
|
|
|
// var args = lstack.slice.call(arguments, 1);
|
|
//this.reductionCount = this.shiftCount = 0;
|
|
|
|
var lexer = Object.create(this.lexer);
|
|
var yy = this.yy;
|
|
|
|
lexer.setInput(input,yy);
|
|
|
|
if (typeof yy.parseError === 'function') {
|
|
this.parseError = yy.parseError;
|
|
} else {
|
|
this.parseError = Object.getPrototypeOf(this).parseError; // what?
|
|
}
|
|
|
|
function popStack (n) {
|
|
stack.length = stack.length - 2 * n;
|
|
vstack.length = vstack.length - n;
|
|
}
|
|
|
|
var symbol, preErrorSymbol, state, action, a, r, yyval = {}, p, len, newState, expected;
|
|
|
|
function handleError(){
|
|
var error_rule_depth;
|
|
var errStr = '';
|
|
|
|
// Return the rule stack depth where the nearest error rule can be found.
|
|
// Return FALSE when no error recovery rule was found.
|
|
// we have no rules now
|
|
function locateNearestErrorRecoveryRule(state) {
|
|
var stack_probe = stack.length - 1;
|
|
var depth = 0;
|
|
|
|
// try to recover from error
|
|
for(;;) {
|
|
// check for error recovery rule in this state
|
|
if ((TERROR.toString()) in table[state]) {
|
|
return depth;
|
|
}
|
|
if (state === 0 || stack_probe < 2) {
|
|
return false; // No suitable error recovery rule available.
|
|
}
|
|
stack_probe -= 2; // popStack(1): [symbol, action]
|
|
state = stack[stack_probe];
|
|
++depth;
|
|
}
|
|
}
|
|
|
|
if (!recovering) {
|
|
// first see if there's any chance at hitting an error recovery rule:
|
|
error_rule_depth = locateNearestErrorRecoveryRule(state);
|
|
|
|
// Report error
|
|
expected = [];
|
|
|
|
var tsym = lexer.yytext;
|
|
var lastToken = tsym;
|
|
var tok = self.terminals_[symbol] || symbol;
|
|
|
|
// Find closest non-generated token
|
|
let tidx = lexer.tokens.indexOf(tsym);
|
|
let ttok = tsym;
|
|
while(ttok && ttok._loc == -1){
|
|
ttok = lexer.tokens[--tidx];
|
|
}
|
|
|
|
var tloc = ttok ? ttok._loc : -1;
|
|
var tend = tloc > -1 ? (tloc + (ttok._len || 0)) : -1;
|
|
var tpos = tloc != -1 ? "[" + ttok._loc + ":" + ttok._len + "]" : '[0:0]';
|
|
|
|
|
|
|
|
if (lexer.showPosition) {
|
|
errStr = 'Parse error at '+(tpos)+":\n"+lexer.showPosition()+"\nExpecting "+expected.join(', ') + ", got '" + (tok)+ "'";
|
|
} else {
|
|
// errStr = 'Parse error at '+(tpos)+": Unexpected " + (symbol == EOF ? "end of input" : ("'"+(tok)+"'"));
|
|
errStr = "Unexpected " + (symbol == EOF ? "end of input" : ("'"+(tok)+"'"));
|
|
}
|
|
|
|
if(script){
|
|
|
|
let err = script.addDiagnostic('error',{
|
|
message: errStr,
|
|
source: 'imba-parser',
|
|
range: script.rangeAt(tloc,tend)
|
|
})
|
|
|
|
err.raise();
|
|
}
|
|
|
|
self.parseError(errStr, {
|
|
lexer: lexer,
|
|
text: lexer.match,
|
|
token: tok,
|
|
offset: tloc,
|
|
length: (tend - tloc),
|
|
start: {offset: tloc},
|
|
end: {offset: tend},
|
|
line: lexer.yylineno,
|
|
expected: expected,
|
|
recoverable: (error_rule_depth !== false)
|
|
});
|
|
|
|
} else if (preErrorSymbol !== EOF) {
|
|
error_rule_depth = locateNearestErrorRecoveryRule(state);
|
|
}
|
|
|
|
// just recovered from another error
|
|
if (recovering == 3) {
|
|
if (symbol === EOF || preErrorSymbol === EOF) {
|
|
throw new Error(errStr || 'Parsing halted while starting to recover from another error.');
|
|
}
|
|
|
|
// discard current lookahead and grab another
|
|
yytext = lexer.yytext;
|
|
}
|
|
|
|
// try to recover from error
|
|
if (error_rule_depth === false) {
|
|
throw new Error(errStr || 'Parsing halted. No suitable error recovery rule available.');
|
|
}
|
|
popStack(error_rule_depth);
|
|
preErrorSymbol = (symbol == TERROR ? null : symbol); // save the lookahead token
|
|
symbol = TERROR; // insert generic error symbol as new lookahead
|
|
state = stack[stack.length-1];
|
|
action = table[state] && table[state][TERROR];
|
|
recovering = 3; // allow 3 real symbols to be shifted before reporting a new error
|
|
}
|
|
|
|
|
|
var __sym = this.symbols_;
|
|
var __prod = this.productions_;
|
|
|
|
while (true) {
|
|
// retreive state number from top of stack
|
|
state = stack[stack.length - 1];
|
|
|
|
if (symbol === null || typeof symbol == 'undefined') {
|
|
symbol = __sym[lexer.lex()] || EOF;
|
|
}
|
|
action = table[state] && table[state][symbol];
|
|
|
|
_handle_error:
|
|
if (typeof action === 'undefined' || !action.length || !action[0]) {
|
|
handleError();
|
|
}
|
|
|
|
switch (action[0]) {
|
|
case 1: // shift
|
|
stack.push(symbol);
|
|
stack.push(action[1]); // push state
|
|
vstack.push(lexer.yytext);
|
|
|
|
symbol = null;
|
|
if (!preErrorSymbol) { // normal execution/no error
|
|
yytext = lexer.yytext;
|
|
if (recovering > 0) {
|
|
recovering--;
|
|
}
|
|
} else {
|
|
// error just occurred, resume old lookahead f/ before error
|
|
symbol = preErrorSymbol;
|
|
preErrorSymbol = null;
|
|
}
|
|
break;
|
|
|
|
case 2:
|
|
len = __prod[action[1]][1];
|
|
// perform semantic action
|
|
yyval.$ = vstack[vstack.length-len];
|
|
r = this.performAction(yyval, yytext, yy, action[1], vstack);
|
|
if (typeof r !== 'undefined') {
|
|
return r;
|
|
}
|
|
|
|
while(len > 0) {
|
|
stack.pop();
|
|
stack.pop();
|
|
vstack.pop();
|
|
len--;
|
|
}
|
|
|
|
stack.push(__prod[action[1]][0]);
|
|
newState = table[stack[stack.length-2]][stack[stack.length-1]];
|
|
stack.push(newState);
|
|
vstack.push(yyval.$);
|
|
break;
|
|
|
|
case 3:
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
};
|
|
|
|
parser.init = function parser_init (dict) {
|
|
this.table = dict.table;
|
|
this.defaultActions = dict.defaultActions;
|
|
this.performAction = dict.performAction;
|
|
this.productions_ = dict.productions_;
|
|
this.symbols_ = dict.symbols_;
|
|
this.terminals_ = dict.terminals_;
|
|
};
|
|
|
|
/*
|
|
* LR(0) Parser
|
|
* */
|
|
|
|
var lr0 = generator.beget(lookaheadMixin, lrGeneratorMixin, {
|
|
type: "LR(0)",
|
|
afterconstructor: function lr0_afterconstructor () {
|
|
this.buildTable();
|
|
}
|
|
});
|
|
|
|
var LR0Generator = exports.LR0Generator = lr0.construct();
|
|
|
|
/*
|
|
* Simple LALR(1)
|
|
* */
|
|
|
|
var lalr = generator.beget(lookaheadMixin, lrGeneratorMixin, {
|
|
type: "LALR(1)",
|
|
|
|
afterconstructor: function (grammar, options) {
|
|
if (this.DEBUG) this.mix(lrGeneratorDebug, lalrGeneratorDebug); // mixin debug methods
|
|
|
|
options = options || {};
|
|
this.states = this.canonicalCollection();
|
|
this.terms_ = {};
|
|
|
|
var newg = this.newg = typal.beget(lookaheadMixin,{
|
|
oldg: this,
|
|
trace: this.trace,
|
|
nterms_: {},
|
|
DEBUG: false,
|
|
go_: function (r, B) {
|
|
r = r.split(":")[0]; // grab state #
|
|
B = B.map(function (b) { return b.slice(b.indexOf(":")+1); });
|
|
return this.oldg.go(r, B);
|
|
}
|
|
});
|
|
newg.nonterminals = {};
|
|
newg.productions = [];
|
|
|
|
this.inadequateStates = [];
|
|
|
|
// if true, only lookaheads in inadequate states are computed (faster, larger table)
|
|
// if false, lookaheads for all reductions will be computed (slower, smaller table)
|
|
this.onDemandLookahead = options.onDemandLookahead || false;
|
|
|
|
this.buildNewGrammar();
|
|
newg.computeLookaheads();
|
|
this.unionLookaheads();
|
|
|
|
this.table = this.parseTable(this.states);
|
|
this.defaultActions = findDefaults(this.table);
|
|
},
|
|
|
|
lookAheads: function LALR_lookaheads (state, item) {
|
|
return (!!this.onDemandLookahead && !state.inadequate) ? this.terminals : item.follows;
|
|
},
|
|
go: function LALR_go (p, w) {
|
|
var q = parseInt(p, 10);
|
|
for (var i=0;i<w.length;i++) {
|
|
q = this.states.item(q).edges[w[i]] || q;
|
|
}
|
|
return q;
|
|
},
|
|
goPath: function LALR_goPath (p, w) {
|
|
var q = parseInt(p, 10),t,
|
|
path = [];
|
|
for (var i=0;i<w.length;i++) {
|
|
t = w[i] ? q+":"+w[i] : '';
|
|
if (t) this.newg.nterms_[t] = q;
|
|
path.push(t);
|
|
q = this.states.item(q).edges[w[i]] || q;
|
|
this.terms_[t] = w[i];
|
|
}
|
|
return {path: path, endState: q};
|
|
},
|
|
// every disjoint reduction of a nonterminal becomes a produciton in G'
|
|
buildNewGrammar: function LALR_buildNewGrammar () {
|
|
var self = this,
|
|
newg = this.newg;
|
|
|
|
this.states.forEach(function (state, i) {
|
|
state.forEach(function (item) {
|
|
if (item.dotPosition === 0) {
|
|
// new symbols are a combination of state and transition symbol
|
|
var symbol = i+":"+item.production.symbol;
|
|
self.terms_[symbol] = item.production.symbol;
|
|
newg.nterms_[symbol] = i;
|
|
if (!newg.nonterminals[symbol])
|
|
newg.nonterminals[symbol] = new Nonterminal(symbol);
|
|
var pathInfo = self.goPath(i, item.production.handle);
|
|
var p = new Production(symbol, pathInfo.path, newg.productions.length);
|
|
newg.productions.push(p);
|
|
newg.nonterminals[symbol].productions.push(p);
|
|
|
|
// store the transition that get's 'backed up to' after reduction on path
|
|
var handle = item.production.handle.join(' ');
|
|
var goes = self.states.item(pathInfo.endState).goes;
|
|
if (!goes[handle])
|
|
goes[handle] = [];
|
|
goes[handle].push(symbol);
|
|
|
|
//self.trace('new production:',p);
|
|
}
|
|
});
|
|
if (state.inadequate)
|
|
self.inadequateStates.push(i);
|
|
});
|
|
},
|
|
unionLookaheads: function LALR_unionLookaheads () {
|
|
var self = this,
|
|
newg = this.newg,
|
|
states = !!this.onDemandLookahead ? this.inadequateStates : this.states;
|
|
|
|
states.forEach(function union_states_forEach (i) {
|
|
var state = typeof i === 'number' ? self.states.item(i) : i,
|
|
follows = [];
|
|
if (state.reductions.length)
|
|
state.reductions.forEach(function union_reduction_forEach (item) {
|
|
var follows = {};
|
|
for (var k=0;k<item.follows.length;k++) {
|
|
follows[item.follows[k]] = true;
|
|
}
|
|
state.goes[item.production.handle.join(' ')].forEach(function reduction_goes_forEach (symbol) {
|
|
newg.nonterminals[symbol].follows.forEach(function goes_follows_forEach (symbol) {
|
|
var terminal = self.terms_[symbol];
|
|
if (!follows[terminal]) {
|
|
follows[terminal]=true;
|
|
item.follows.push(terminal);
|
|
}
|
|
});
|
|
});
|
|
//self.trace('unioned item', item);
|
|
});
|
|
});
|
|
}
|
|
});
|
|
|
|
var LALRGenerator = exports.LALRGenerator = lalr.construct();
|
|
|
|
// LALR generator debug mixin
|
|
|
|
var lalrGeneratorDebug = {
|
|
trace: function trace () {
|
|
Jison.print.apply(null, arguments);
|
|
},
|
|
beforebuildNewGrammar: function () {
|
|
this.trace(this.states.size()+" states.");
|
|
this.trace("Building lookahead grammar.");
|
|
},
|
|
beforeunionLookaheads: function () {
|
|
this.trace("Computing lookaheads.");
|
|
}
|
|
};
|
|
|
|
/*
|
|
* Lookahead parser definitions
|
|
*
|
|
* Define base type
|
|
* */
|
|
var lrLookaheadGenerator = generator.beget(lookaheadMixin, lrGeneratorMixin, {
|
|
afterconstructor: function lr_aftercontructor () {
|
|
this.computeLookaheads();
|
|
this.buildTable();
|
|
}
|
|
});
|
|
|
|
/*
|
|
* SLR Parser
|
|
* */
|
|
var SLRGenerator = exports.SLRGenerator = lrLookaheadGenerator.construct({
|
|
type: "SLR(1)",
|
|
|
|
lookAheads: function SLR_lookAhead (state, item) {
|
|
return this.nonterminals[item.production.symbol].follows;
|
|
}
|
|
});
|
|
|
|
|
|
/*
|
|
* LR(1) Parser
|
|
* */
|
|
var lr1 = lrLookaheadGenerator.beget({
|
|
type: "Canonical LR(1)",
|
|
|
|
lookAheads: function LR_lookAheads (state, item) {
|
|
return item.follows;
|
|
},
|
|
Item: lrGeneratorMixin.Item.prototype.construct({
|
|
afterconstructor: function () {
|
|
this.id = this.production.id+'a'+this.dotPosition+'a'+this.follows.sort().join(',');
|
|
},
|
|
eq: function (e) {
|
|
return e.id === this.id;
|
|
}
|
|
}),
|
|
|
|
closureOperation: function LR_ClosureOperation (itemSet /*, closureSet*/) {
|
|
var closureSet = new this.ItemSet();
|
|
var self = this;
|
|
|
|
var set = itemSet,
|
|
itemQueue, syms = {};
|
|
|
|
do {
|
|
itemQueue = new Set();
|
|
closureSet.concat(set);
|
|
set.forEach(function (item) {
|
|
var symbol = item.markedSymbol;
|
|
var b, r;
|
|
|
|
// if token is a nonterminal, recursively add closures
|
|
if (symbol && self.nonterminals[symbol]) {
|
|
r = item.remainingHandle();
|
|
b = self.first(item.remainingHandle());
|
|
if (b.length === 0 || item.production.nullable || self.nullable(r)) {
|
|
b = b.concat(item.follows);
|
|
}
|
|
self.nonterminals[symbol].productions.forEach(function (production) {
|
|
var newItem = new self.Item(production, 0, b);
|
|
if(!closureSet.contains(newItem) && !itemQueue.contains(newItem)) {
|
|
itemQueue.push(newItem);
|
|
}
|
|
});
|
|
} else if (!symbol) {
|
|
// reduction
|
|
closureSet.reductions.push(item);
|
|
}
|
|
});
|
|
|
|
set = itemQueue;
|
|
} while (!itemQueue.isEmpty());
|
|
|
|
return closureSet;
|
|
}
|
|
});
|
|
|
|
var LR1Generator = exports.LR1Generator = lr1.construct();
|
|
|
|
/*
|
|
* LL Parser
|
|
* */
|
|
var ll = generator.beget(lookaheadMixin, {
|
|
type: "LL(1)",
|
|
|
|
afterconstructor: function ll_aftercontructor () {
|
|
this.computeLookaheads();
|
|
this.table = this.parseTable(this.productions);
|
|
},
|
|
parseTable: function llParseTable (productions) {
|
|
var table = {},
|
|
self = this;
|
|
productions.forEach(function (production, i) {
|
|
var row = table[production.symbol] || {};
|
|
var tokens = production.first;
|
|
if (self.nullable(production.handle)) {
|
|
Set.union(tokens, self.nonterminals[production.symbol].follows);
|
|
}
|
|
tokens.forEach(function (token) {
|
|
if (row[token]) {
|
|
row[token].push(i);
|
|
self.conflicts++;
|
|
} else {
|
|
row[token] = [i];
|
|
}
|
|
});
|
|
table[production.symbol] = row;
|
|
});
|
|
|
|
return table;
|
|
}
|
|
});
|
|
|
|
var LLGenerator = exports.LLGenerator = ll.construct();
|
|
|
|
Jison.Generator = function Jison_Generator (g, options) {
|
|
var opt = typal.mix.call({}, g.options, options);
|
|
switch (opt.type) {
|
|
case 'lr0':
|
|
return new LR0Generator(g, opt);
|
|
case 'slr':
|
|
return new SLRGenerator(g, opt);
|
|
case 'lr':
|
|
return new LR1Generator(g, opt);
|
|
case 'll':
|
|
return new LLGenerator(g, opt);
|
|
default:
|
|
return new LALRGenerator(g, opt);
|
|
}
|
|
};
|
|
|
|
return function Parser (g, options) {
|
|
var gen = Jison.Generator(g, options);
|
|
return gen.createParser();
|
|
};
|
|
|
|
})();
|
|
|