// Jison, an LR(0), SLR(1), LARL(1), LR(1) Parser Generator // Zachary Carter // MIT X Licensed var typal = require('./util/typal').typal; var Set = require('./util/set').Set; var Jison = exports.Jison = exports; // detect print if (typeof console !== 'undefined' && console.log) { Jison.print = console.log; } else if (typeof puts !== 'undefined') { Jison.print = function print () { puts([].join.call(arguments, ' ')); }; } else if (typeof print !== 'undefined') { Jison.print = print; } else { Jison.print = function print () {}; } Jison.Parser = (function () { // iterator utility function each (obj, func) { if (obj.forEach) { obj.forEach(func); } else { var p; for (p in obj) { if (obj.hasOwnProperty(p)) { func.call(obj, obj[p], p, obj); } } } } var Nonterminal = typal.construct({ constructor: function Nonterminal (symbol) { this.symbol = symbol; this.productions = new Set(); this.first = []; this.follows = []; this.nullable = false; }, toString: function Nonterminal_toString () { var str = this.symbol+"\n"; str += (this.nullable ? 'nullable' : 'not nullable'); str += "\nFirsts: "+this.first.join(', '); str += "\nFollows: "+this.first.join(', '); str += "\nProductions:\n "+this.productions.join('\n '); return str; } }); var Production = typal.construct({ constructor: function Production (symbol, handle, id) { this.symbol = symbol; this.handle = handle; this.nullable = false; this.id = id; this.first = []; this.precedence = 0; }, toString: function Production_toString () { return this.symbol+" -> "+this.handle.join(' '); } }); var generator = typal.beget(); generator.constructor = function Jison_Generator (grammar, opt) { var options = typal.mix.call({}, grammar.options, opt); this.terms = {}; this.operators = {}; this.productions = []; this.conflicts = 0; this.resolutions = []; this.options = options; this.parseParams = grammar.parseParams; this.yy = {}; // accessed as yy free variable in the parser/lexer actions // source included in semantic action execution scope if (grammar.actionInclude) { if (typeof grammar.actionInclude === 'function') { grammar.actionInclude = String(grammar.actionInclude).replace(/^\s*function \(\) \{/, '').replace(/\}\s*$/, ''); } this.actionInclude = grammar.actionInclude; } this.moduleInclude = grammar.moduleInclude || ''; this.DEBUG = options.debug || false; if (this.DEBUG) this.mix(generatorDebug); // mixin debug methods this.processGrammar(grammar); }; generator.processGrammar = function processGrammarDef (grammar) { var bnf = grammar.bnf, tokens = grammar.tokens, nonterminals = this.nonterminals = {}, productions = this.productions, self = this; if (tokens) { if (typeof tokens === 'string') { tokens = tokens.trim().split(' '); } else { tokens = tokens.slice(0); } } var symbols = this.symbols = []; // calculate precedence of operators var operators = this.operators = processOperators(grammar.operators); // build productions from cfg this.buildProductions(bnf, productions, nonterminals, symbols, operators); if (tokens && this.terminals.length !== tokens.length) { self.trace("Warning: declared tokens differ from tokens found in rules."); self.trace(this.terminals); self.trace(tokens); } // augment the grammar this.augmentGrammar(grammar); }; generator.augmentGrammar = function augmentGrammar (grammar) { if (this.productions.length === 0) { throw new Error("Grammar error: must have at least one rule."); } // use specified start symbol, or default to first user defined production this.startSymbol = grammar.start || grammar.startSymbol || this.productions[0].symbol; if (!this.nonterminals[this.startSymbol]) { throw new Error("Grammar error: startSymbol must be a non-terminal found in your grammar."); } this.EOF = "$end"; // augment the grammar var acceptProduction = new Production('$accept', [this.startSymbol, '$end'], 0); this.productions.unshift(acceptProduction); // prepend parser tokens this.symbols.unshift("$accept",this.EOF); this.symbols_.$accept = 0; this.symbols_[this.EOF] = 1; this.terminals.unshift(this.EOF); this.nonterminals.$accept = new Nonterminal("$accept"); this.nonterminals.$accept.productions.push(acceptProduction); // add follow $ to start symbol this.nonterminals[this.startSymbol].follows.push(this.EOF); }; // set precedence and associativity of operators function processOperators (ops) { if (!ops) return {}; var operators = {}; for (var i=0,k,prec;prec=ops[i]; i++) { for (k=1;k < prec.length;k++) { operators[prec[k]] = {precedence: i+1, assoc: prec[0]}; } } return operators; } generator.buildProductions = function buildProductions(bnf, productions, nonterminals, symbols, operators) { // Because of the switch limits in v8 this should probably be split into several methods for the different ranges var actions = [ '/* self == yyval */', this.actionInclude || '', 'var $0 = $$.length - 1;', 'switch (yystate) {' ]; var actionGroups = {}; var prods, symbol; var productions_ = [0]; var symbolId = 1; var symbols_ = {}; var her = false; // has error recovery function addSymbol (s) { if (s && !symbols_[s]) { symbols_[s] = ++symbolId; symbols.push(s); } } // add error symbol; will be third symbol, or "2" ($accept, $end, error) addSymbol("error"); for (symbol in bnf) { if (!bnf.hasOwnProperty(symbol)) continue; addSymbol(symbol); nonterminals[symbol] = new Nonterminal(symbol); if (typeof bnf[symbol] === 'string') { prods = bnf[symbol].split(/\s*\|\s*/g); } else { prods = bnf[symbol].slice(0); } prods.forEach(buildProduction); } for (var action in actionGroups) actions.push(actionGroups[action].join(' '), action, 'break;'); var sym, terms = [], terms_ = {}; each(symbols_, function (id, sym) { if (!nonterminals[sym]) { terms.push(sym); terms_[id] = sym; } }); this.hasErrorRecovery = her; this.terminals = terms; this.terminals_ = terms_; this.symbols_ = symbols_; this.productions_ = productions_; actions.push('}'); actions = actions.join("\n") .replace(/YYABORT/g, 'return false') .replace(/YYACCEPT/g, 'return true'); var yyvalParam = "this"; var parameters = "self, yytext, yy, yystate /* action[1] */, $$ /* vstack */"; if (this.parseParams) parameters += ', ' + this.parseParams.join(', '); this.performAction = "function performAction(" + parameters + ") {\n" + actions + "\n}"; function buildProduction (handle) { var r, rhs, i; if (handle.constructor === Array) { rhs = (typeof handle[0] === 'string') ? handle[0].trim().split(' ') : handle[0].slice(0); for (i=0; i don't care about aliases; strip them. rhs = rhs.map(function(e,i) { return e.replace(/\[[a-zA-Z_][a-zA-Z0-9_-]*\]/g, '') }); // only precedence specified r = new Production(symbol, rhs, productions.length+1); if (operators[handle[1].prec]) { r.precedence = operators[handle[1].prec].precedence; } } } else { // no action -> don't care about aliases; strip them. handle = handle.replace(/\[[a-zA-Z_][a-zA-Z0-9_-]*\]/g, ''); rhs = handle.trim().split(' '); for (i=0; i=0; i--) { if (!(r.handle[i] in nonterminals) && r.handle[i] in operators) { r.precedence = operators[r.handle[i]].precedence; } } } productions.push(r); productions_.push([symbols_[r.symbol], r.handle[0] === '' ? 0 : r.handle.length]); nonterminals[symbol].productions.push(r); } }; generator.createParser = function createParser () { throw new Error('Calling abstract method.'); }; // noop. implemented in debug mixin generator.trace = function trace () { }; generator.warn = function warn () { var args = Array.prototype.slice.call(arguments,0); Jison.print.call(null,args.join("")); }; generator.error = function error (msg) { throw new Error(msg); }; // Generator debug mixin var generatorDebug = { trace: function trace () { Jison.print.apply(null, arguments); }, beforeprocessGrammar: function () { this.trace("Processing grammar."); }, afteraugmentGrammar: function () { var trace = this.trace; each(this.symbols, function (sym, i) { trace(sym+"("+i+")"); }); } }; /* * Mixin for common behaviors of lookahead parsers * */ var lookaheadMixin = {}; lookaheadMixin.computeLookaheads = function computeLookaheads () { if (this.DEBUG) this.mix(lookaheadDebug); // mixin debug methods this.computeLookaheads = function () {}; this.nullableSets(); this.firstSets(); this.followSets(); }; // calculate follow sets typald on first and nullable lookaheadMixin.followSets = function followSets () { var productions = this.productions, nonterminals = this.nonterminals, self = this, cont = true; // loop until no further changes have been made while(cont) { cont = false; productions.forEach(function Follow_prod_forEach (production, k) { //self.trace(production.symbol,nonterminals[production.symbol].follows); // q is used in Simple LALR algorithm determine follows in context var q; var ctx = !!self.go_; var set = [],oldcount; for (var i=0,t;t=production.handle[i];++i) { if (!nonterminals[t]) continue; // for Simple LALR algorithm, self.go_ checks if if (ctx) q = self.go_(production.symbol, production.handle.slice(0, i)); var bool = !ctx || q === parseInt(self.nterms_[t], 10); if (i === production.handle.length+1 && bool) { set = nonterminals[production.symbol].follows; } else { var part = production.handle.slice(i+1); set = self.first(part); if (self.nullable(part) && bool) { set.push.apply(set, nonterminals[production.symbol].follows); } } oldcount = nonterminals[t].follows.length; Set.union(nonterminals[t].follows, set); if (oldcount !== nonterminals[t].follows.length) { cont = true; } } }); } }; // return the FIRST set of a symbol or series of symbols lookaheadMixin.first = function first (symbol) { // epsilon if (symbol === '') { return []; // RHS } else if (symbol instanceof Array) { var firsts = []; for (var i=0,t;t=symbol[i];++i) { if (!this.nonterminals[t]) { if (firsts.indexOf(t) === -1) firsts.push(t); } else { Set.union(firsts, this.nonterminals[t].first); } if (!this.nullable(t)) break; } return firsts; // terminal } else if (!this.nonterminals[symbol]) { return [symbol]; // nonterminal } else { return this.nonterminals[symbol].first; } }; // fixed-point calculation of FIRST sets lookaheadMixin.firstSets = function firstSets () { var productions = this.productions, nonterminals = this.nonterminals, self = this, cont = true, symbol,firsts; // loop until no further changes have been made while(cont) { cont = false; productions.forEach(function FirstSets_forEach (production, k) { var firsts = self.first(production.handle); if (firsts.length !== production.first.length) { production.first = firsts; cont=true; } }); for (symbol in nonterminals) { firsts = []; nonterminals[symbol].productions.forEach(function (production) { Set.union(firsts, production.first); }); if (firsts.length !== nonterminals[symbol].first.length) { nonterminals[symbol].first = firsts; cont=true; } } } }; // fixed-point calculation of NULLABLE lookaheadMixin.nullableSets = function nullableSets () { var firsts = this.firsts = {}, nonterminals = this.nonterminals, self = this, cont = true; // loop until no further changes have been made while(cont) { cont = false; // check if each production is nullable this.productions.forEach(function (production, k) { if (!production.nullable) { for (var i=0,n=0,t;t=production.handle[i];++i) { if (self.nullable(t)) n++; } if (n===i) { // production is nullable if all tokens are nullable production.nullable = cont = true; } } }); //check if each symbol is nullable for (var symbol in nonterminals) { if (!this.nullable(symbol)) { for (var i=0,production;production=nonterminals[symbol].productions.item(i);i++) { if (production.nullable) nonterminals[symbol].nullable = cont = true; } } } } }; // check if a token or series of tokens is nullable lookaheadMixin.nullable = function nullable (symbol) { // epsilon if (symbol === '') { return true; // RHS } else if (symbol instanceof Array) { for (var i=0,t;t=symbol[i];++i) { if (!this.nullable(t)) return false; } return true; // terminal } else if (!this.nonterminals[symbol]) { return false; // nonterminal } else { return this.nonterminals[symbol].nullable; } }; // lookahead debug mixin var lookaheadDebug = { beforenullableSets: function () { this.trace("Computing Nullable sets."); }, beforefirstSets: function () { this.trace("Computing First sets."); }, beforefollowSets: function () { this.trace("Computing Follow sets."); }, afterfollowSets: function () { var trace = this.trace; each(this.nonterminals, function (nt, t) { trace(nt, '\n'); }); } }; /* * Mixin for common LR parser behavior * */ var lrGeneratorMixin = {}; lrGeneratorMixin.buildTable = function buildTable () { if (this.DEBUG) this.mix(lrGeneratorDebug); // mixin debug methods this.states = this.canonicalCollection(); this.table = this.parseTable(this.states); this.defaultActions = findDefaults(this.table); }; lrGeneratorMixin.Item = typal.construct({ constructor: function Item(production, dot, f, predecessor) { this.production = production; this.dotPosition = dot || 0; this.follows = f || []; this.predecessor = predecessor; this.id = parseInt(production.id+'a'+this.dotPosition, 36); this.markedSymbol = this.production.handle[this.dotPosition]; }, remainingHandle: function () { return this.production.handle.slice(this.dotPosition+1); }, eq: function (e) { return e.id === this.id; }, handleToString: function () { var handle = this.production.handle.slice(0); handle[this.dotPosition] = '.'+(handle[this.dotPosition]||''); return handle.join(' '); }, toString: function () { var temp = this.production.handle.slice(0); temp[this.dotPosition] = '.'+(temp[this.dotPosition]||''); return this.production.symbol+" -> "+temp.join(' ') + (this.follows.length === 0 ? "" : " #lookaheads= "+this.follows.join(' ')); } }); lrGeneratorMixin.ItemSet = Set.prototype.construct({ afterconstructor: function () { this.reductions = []; this.goes = {}; this.edges = {}; this.shifts = false; this.inadequate = false; this.hash_ = {}; for (var i=this._items.length-1;i >=0;i--) { this.hash_[this._items[i].id] = true; //i; } }, concat: function concat (set) { var a = set._items || set; for (var i=a.length-1;i >=0;i--) { this.hash_[a[i].id] = true; //i; } this._items.push.apply(this._items, a); return this; }, push: function (item) { this.hash_[item.id] = true; return this._items.push(item); }, contains: function (item) { return this.hash_[item.id]; }, valueOf: function toValue () { var v = this._items.map(function (a) {return a.id;}).sort().join('|'); this.valueOf = function toValue_inner() {return v;}; return v; } }); lrGeneratorMixin.closureOperation = function closureOperation (itemSet /*, closureSet*/) { var closureSet = new this.ItemSet(); var self = this; var set = itemSet, itemQueue, syms = {}; do { itemQueue = new Set(); closureSet.concat(set); set.forEach(function CO_set_forEach (item) { var symbol = item.markedSymbol; // if token is a non-terminal, recursively add closures if (symbol && self.nonterminals[symbol]) { if(!syms[symbol]) { self.nonterminals[symbol].productions.forEach(function CO_nt_forEach (production) { var newItem = new self.Item(production, 0); if(!closureSet.contains(newItem)) itemQueue.push(newItem); }); syms[symbol] = true; } } else if (!symbol) { // reduction closureSet.reductions.push(item); closureSet.inadequate = closureSet.reductions.length > 1 || closureSet.shifts; } else { // shift closureSet.shifts = true; closureSet.inadequate = closureSet.reductions.length > 0; } }); set = itemQueue; } while (!itemQueue.isEmpty()); return closureSet; }; lrGeneratorMixin.gotoOperation = function gotoOperation (itemSet, symbol) { var gotoSet = new this.ItemSet(), self = this; itemSet.forEach(function goto_forEach(item, n) { if (item.markedSymbol === symbol) { gotoSet.push(new self.Item(item.production, item.dotPosition+1, item.follows, n)); } }); return gotoSet.isEmpty() ? gotoSet : this.closureOperation(gotoSet); }; /* Create unique set of item sets * */ lrGeneratorMixin.canonicalCollection = function canonicalCollection () { var item1 = new this.Item(this.productions[0], 0, [this.EOF]); var firstState = this.closureOperation(new this.ItemSet(item1)), states = new Set(firstState), marked = 0, self = this, itemSet; states.has = {}; states.has[firstState] = 0; while (marked !== states.size()) { itemSet = states.item(marked); marked++; itemSet.forEach(function CC_itemSet_forEach (item) { if (item.markedSymbol && item.markedSymbol !== self.EOF) self.canonicalCollectionInsert(item.markedSymbol, itemSet, states, marked-1); }); } return states; }; // Pushes a unique state into the que. Some parsing algorithms may perform additional operations lrGeneratorMixin.canonicalCollectionInsert = function canonicalCollectionInsert (symbol, itemSet, states, stateNum) { var g = this.gotoOperation(itemSet, symbol); if (!g.predecessors) g.predecessors = {}; // add g to que if not empty or duplicate if (!g.isEmpty()) { var gv = g.valueOf(), i = states.has[gv]; if (i === -1 || typeof i === 'undefined') { states.has[gv] = states.size(); itemSet.edges[symbol] = states.size(); // store goto transition for table states.push(g); g.predecessors[symbol] = [stateNum]; } else { itemSet.edges[symbol] = i; // store goto transition for table states.item(i).predecessors[symbol].push(stateNum); } } }; var NONASSOC = 0; lrGeneratorMixin.parseTable = function parseTable (itemSets) { var states = [], nonterminals = this.nonterminals, operators = this.operators, conflictedStates = {}, // array of [state, token] tuples self = this, s = 1, // shift r = 2, // reduce a = 3; // accept // for each item set itemSets.forEach(function (itemSet, k) { var state = states[k] = {}; var action, stackSymbol; // set shift and goto actions for (stackSymbol in itemSet.edges) { itemSet.forEach(function (item, j) { // find shift and goto actions if (item.markedSymbol == stackSymbol) { var gotoState = itemSet.edges[stackSymbol]; if (nonterminals[stackSymbol]) { // store state to go to after a reduce //self.trace(k, stackSymbol, 'g'+gotoState); state[self.symbols_[stackSymbol]] = gotoState; } else { //self.trace(k, stackSymbol, 's'+gotoState); state[self.symbols_[stackSymbol]] = [s,gotoState]; } } }); } // set accept action itemSet.forEach(function (item, j) { if (item.markedSymbol == self.EOF) { // accept state[self.symbols_[self.EOF]] = [a]; //self.trace(k, self.EOF, state[self.EOF]); } }); var allterms = self.lookAheads ? false : self.terminals; // set reductions and resolve potential conflicts itemSet.reductions.forEach(function (item, j) { // if parser uses lookahead, only enumerate those terminals var terminals = allterms || self.lookAheads(itemSet, item); terminals.forEach(function (stackSymbol) { action = state[self.symbols_[stackSymbol]]; var op = operators[stackSymbol]; // Reading a terminal and current position is at the end of a production, try to reduce if (action || action && action.length) { var sol = resolveConflict(item.production, op, [r,item.production.id], action[0] instanceof Array ? action[0] : action); self.resolutions.push([k,stackSymbol,sol]); if (sol.bydefault) { self.conflicts++; if (!self.DEBUG) { self.warn('Conflict in grammar: multiple actions possible when lookahead token is ',stackSymbol,' in state ',k, "\n- ", printAction(sol.r, self), "\n- ", printAction(sol.s, self)); conflictedStates[k] = true; } if (self.options.noDefaultResolve) { if (!(action[0] instanceof Array)) action = [action]; action.push(sol.r); } } else { action = sol.action; } } else { action = [r,item.production.id]; } if (action && action.length) { state[self.symbols_[stackSymbol]] = action; } else if (action === NONASSOC) { state[self.symbols_[stackSymbol]] = undefined; } }); }); }); if (!self.DEBUG && self.conflicts > 0) { self.warn("\nStates with conflicts:"); each(conflictedStates, function (val, state) { self.warn('State '+state); self.warn(' ',itemSets.item(state).join("\n ")); }); } return states; }; // find states with only one action, a reduction function findDefaults (states) { var defaults = {}; states.forEach(function (state, k) { var i = 0; for (var act in state) { if ({}.hasOwnProperty.call(state, act)) i++; } if (i === 1 && state[act][0] === 2) { // only one action in state and it's a reduction defaults[k] = state[act]; } }); return defaults; } // resolves shift-reduce and reduce-reduce conflicts function resolveConflict (production, op, reduce, shift) { var sln = {production: production, operator: op, r: reduce, s: shift}, s = 1, // shift r = 2, // reduce a = 3; // accept if (shift[0] === r) { sln.msg = "Resolve R/R conflict (use first production declared in grammar.)"; sln.action = shift[1] < reduce[1] ? shift : reduce; if (shift[1] !== reduce[1]) sln.bydefault = true; return sln; } if (production.precedence === 0 || !op) { sln.msg = "Resolve S/R conflict (shift by default.)"; sln.bydefault = true; sln.action = shift; } else if (production.precedence < op.precedence ) { sln.msg = "Resolve S/R conflict (shift for higher precedent operator.)"; sln.action = shift; } else if (production.precedence === op.precedence) { if (op.assoc === "right" ) { sln.msg = "Resolve S/R conflict (shift for right associative operator.)"; sln.action = shift; } else if (op.assoc === "left" ) { sln.msg = "Resolve S/R conflict (reduce for left associative operator.)"; sln.action = reduce; } else if (op.assoc === "nonassoc" ) { sln.msg = "Resolve S/R conflict (no action for non-associative operator.)"; sln.action = NONASSOC; } } else { sln.msg = "Resolve conflict (reduce for higher precedent production.)"; sln.action = reduce; } return sln; } lrGeneratorMixin.generate = function parser_generate (opt) { opt = typal.mix.call({}, this.options, opt); var code = ""; // check for illegal identifier if (!opt.moduleName || !opt.moduleName.match(/^[A-Za-z_$][A-Za-z0-9_$]*$/)) { opt.moduleName = "parser"; } switch (opt.moduleType) { case "js": code = this.generateModule(opt); break; case "amd": code = this.generateAMDModule(opt); break; default: code = this.generateCommonJSModule(opt); break; } return code; }; lrGeneratorMixin.generateAMDModule = function generateAMDModule(opt){ opt = typal.mix.call({}, this.options, opt); var module = this.generateModule_(); var out = '\n\ndefine(function(require){\n' + module.commonCode + '\nvar parser = '+ module.moduleCode + "\n"+this.moduleInclude + (this.lexer && this.lexer.generateModule ? '\n' + this.lexer.generateModule() + '\nparser.lexer = lexer;' : '') + '\nreturn parser;' + '\n});' return out; }; lrGeneratorMixin.generateCommonJSModule = function generateCommonJSModule (opt) { opt = typal.mix.call({}, this.options, opt); var moduleName = opt.moduleName || "parser"; var out = this.generateModule(opt) + "\n\n\nif (typeof require !== 'undefined' && typeof exports !== 'undefined') {" + "\nexports.parser = "+moduleName+";" + "\nexports.Parser = "+moduleName+".Parser;" + "\nexports.parse = function () { return "+moduleName+".parse.apply("+moduleName+", arguments); };" + "\n}"; return out; }; lrGeneratorMixin.generateModule = function generateModule (opt) { opt = typal.mix.call({}, this.options, opt); var moduleName = opt.moduleName || "parser"; var out = "/* parser generated by jison-fork */\n"; out += (moduleName.match(/\./) ? moduleName : "var "+moduleName) + " = " + this.generateModuleExpr(); return out; }; lrGeneratorMixin.generateModuleExpr = function generateModuleExpr () { var out = ''; var module = this.generateModule_(); out += "(function(){\n"; out += module.commonCode; out += "\nvar parser = "+module.moduleCode; out += "\n"+this.moduleInclude; if (this.lexer && this.lexer.generateModule) { out += this.lexer.generateModule(); out += "\nparser.lexer = lexer;"; } out += "\nfunction Parser () {\n this.yy = {};\n}\n" + "Parser.prototype = parser;" + "parser.Parser = Parser;" + "\nreturn new Parser;\n})();"; return out; }; function addTokenStack (fn) { var parseFn = fn; return fn; } // lex function that supports token stacks function tokenStackLex() { var token; token = tstack.pop() || lexer.lex() || EOF; // if token isn't its numeric value, convert if (typeof token !== 'number') { if (token instanceof Array) { tstack = token; token = tstack.pop(); } token = self.symbols_[token] || token; } return token; } // Generates the code of the parser module, which consists of two parts: // - module.commonCode: initialization code that should be placed before the module // - module.moduleCode: code that creates the module object lrGeneratorMixin.generateModule_ = function generateModule_ () { var parseFn = String(parser.parse); // if (!this.hasErrorRecovery) { // parseFn = removeErrorRecovery(parseFn); // } // Generate code with fresh variable names nextVariableId = 0; var tableCode = this.generateTableCode(this.table); // Generate the initialization code var commonCode = tableCode.commonCode; // Generate the module creation code var moduleCode = "{"; moduleCode += [ "trace: " + String(this.trace || parser.trace), "yy: {}", "symbols_: " + JSON.stringify(this.symbols_), "terminals_: " + JSON.stringify(this.terminals_).replace(/"([0-9]+)":/g,"$1:"), "productions_: " + JSON.stringify(this.productions_), "performAction: " + String(this.performAction), "table: " + tableCode.moduleCode, "defaultActions: " + JSON.stringify(this.defaultActions).replace(/"([0-9]+)":/g,"$1:"), "parseError: " + String(this.parseError || (this.hasErrorRecovery ? traceParseError : parser.parseError)), "parse: " + parseFn ].join(",\n"); moduleCode += "};"; return { commonCode: commonCode, moduleCode: moduleCode } }; // Generate code that represents the specified parser table lrGeneratorMixin.generateTableCode = function (table) { var moduleCode = JSON.stringify(table); var variables = [createObjectCode]; // Don't surround numerical property name numbers in quotes moduleCode = moduleCode.replace(/"([0-9]+)"(?=:)/g, "$1"); // Replace objects with several identical values by function calls // e.g., { 1: [6, 7]; 3: [6, 7], 4: [6, 7], 5: 8 } = o([1, 3, 4], [6, 7], { 5: 8 }) moduleCode = moduleCode.replace(/\{\d+:[^\}]+,\d+:[^\}]+\}/g, function (object) { // Find the value that occurs with the highest number of keys var value, frequentValue, key, keys = {}, keyCount, maxKeyCount = 0, keyValue, keyValues = [], keyValueMatcher = /(\d+):([^:]+)(?=,\d+:|\})/g; while ((keyValue = keyValueMatcher.exec(object))) { // For each value, store the keys where that value occurs key = keyValue[1]; value = keyValue[2]; keyCount = 1; if (!(value in keys)) { keys[value] = [key]; } else { keyCount = keys[value].push(key); } // Remember this value if it is the most frequent one if (keyCount > maxKeyCount) { maxKeyCount = keyCount; frequentValue = value; } } // Construct the object with a function call if the most frequent value occurs multiple times if (maxKeyCount > 1) { // Collect all non-frequent values into a remainder object for (value in keys) { if (value !== frequentValue) { for (var k = keys[value], i = 0, l = k.length; i < l; i++) { keyValues.push(k[i] + ':' + value); } } } keyValues = keyValues.length ? ',{' + keyValues.join(',') + '}' : ''; // Create the function call `o(keys, value, remainder)` object = 'o([' + keys[frequentValue].join(',') + '],' + frequentValue + keyValues + ')'; } return object; }); // Count occurrences of number lists var list; var lists = {}; var listMatcher = /\[[0-9,]+\]/g; while (list = listMatcher.exec(moduleCode)) { lists[list] = (lists[list] || 0) + 1; } // Replace frequently occurring number lists with variables moduleCode = moduleCode.replace(listMatcher, function (list) { var listId = lists[list]; // If listId is a number, it represents the list's occurrence frequency if (typeof listId === 'number') { // If the list does not occur frequently, represent it by the list if (listId === 1) { lists[list] = listId = list; // If the list occurs frequently, represent it by a newly assigned variable } else { lists[list] = listId = createVariable(); variables.push(listId + '=' + list); } } return listId; }); // Return the variable initialization code and the table code return { commonCode: 'var ' + variables.join(',') + ';', moduleCode: moduleCode }; }; // Function that extends an object with the given value for all given keys // e.g., o([1, 3, 4], [6, 7], { x: 1, y: 2 }) = { 1: [6, 7]; 3: [6, 7], 4: [6, 7], x: 1, y: 2 } var createObjectCode = 'o=function(k,v,o,l){' + 'for(o=o||{},l=k.length;l--;o[k[l]]=v);' + 'return o}'; // Creates a variable with a unique name function createVariable() { var id = nextVariableId++; var name = '$V'; do { name += variableTokens[id % variableTokensLength]; id = ~~(id / variableTokensLength); } while (id !== 0); return name; } var nextVariableId = 0; var variableTokens = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$'; var variableTokensLength = variableTokens.length; // debug mixin for LR parser generators function printAction (a, gen) { var s = a[0] == 1 ? 'shift token (then go to state '+a[1]+')' : a[0] == 2 ? 'reduce by rule: '+gen.productions[a[1]] : 'accept' ; return s; } var lrGeneratorDebug = { beforeparseTable: function () { this.trace("Building parse table."); }, afterparseTable: function () { var self = this; if (this.conflicts > 0) { this.resolutions.forEach(function (r, i) { if (r[2].bydefault) { self.warn('Conflict at state: ',r[0], ', token: ',r[1], "\n ", printAction(r[2].r, self), "\n ", printAction(r[2].s, self)); } }); this.trace("\n"+this.conflicts+" Conflict(s) found in grammar."); } this.trace("Done."); }, aftercanonicalCollection: function (states) { var trace = this.trace; trace("\nItem sets\n------"); states.forEach(function (state, i) { trace("\nitem set",i,"\n"+state.join("\n"), '\ntransitions -> ', JSON.stringify(state.edges)); }); } }; var parser = typal.beget(); lrGeneratorMixin.createParser = function createParser () { var p = eval(this.generateModuleExpr()); // for debugging p.productions = this.productions; var self = this; function bind(method) { return function() { self.lexer = p.lexer; return self[method].apply(self, arguments); }; } // backwards compatability p.generate = bind('generate'); p.generateAMDModule = bind('generateAMDModule'); p.generateModule = bind('generateModule'); p.generateCommonJSModule = bind('generateCommonJSModule'); return p; }; parser.trace = generator.trace; parser.warn = generator.warn; parser.error = generator.error; function traceParseError (err, hash) { this.trace(err); } function parseError (str, hash) { if (hash.recoverable) { this.trace(str); } else { throw new Error(str); } } parser.parseError = lrGeneratorMixin.parseError = parseError; parser.parse = function parse (input, script = null) { // For Imba we are going to drop most of the features that are not used // Locations are provided by the tokens from the lexer directly - so drop yylloc // We dont really need the shared state (it seems) var self = this, stack = [0], tstack = [], // token stack vstack = [null], // semantic value stack table = this.table, yytext = '', yylineno = 0, yyleng = 0, recovering = 0, TERROR = 2, EOF = 1; // var args = lstack.slice.call(arguments, 1); //this.reductionCount = this.shiftCount = 0; var lexer = Object.create(this.lexer); var yy = this.yy; lexer.setInput(input,yy); if (typeof yy.parseError === 'function') { this.parseError = yy.parseError; } else { this.parseError = Object.getPrototypeOf(this).parseError; // what? } function popStack (n) { stack.length = stack.length - 2 * n; vstack.length = vstack.length - n; } var symbol, preErrorSymbol, state, action, a, r, yyval = {}, p, len, newState, expected; function handleError(){ var error_rule_depth; var errStr = ''; // Return the rule stack depth where the nearest error rule can be found. // Return FALSE when no error recovery rule was found. // we have no rules now function locateNearestErrorRecoveryRule(state) { var stack_probe = stack.length - 1; var depth = 0; // try to recover from error for(;;) { // check for error recovery rule in this state if ((TERROR.toString()) in table[state]) { return depth; } if (state === 0 || stack_probe < 2) { return false; // No suitable error recovery rule available. } stack_probe -= 2; // popStack(1): [symbol, action] state = stack[stack_probe]; ++depth; } } if (!recovering) { // first see if there's any chance at hitting an error recovery rule: error_rule_depth = locateNearestErrorRecoveryRule(state); // Report error expected = []; var tsym = lexer.yytext; var lastToken = tsym; var tok = self.terminals_[symbol] || symbol; // Find closest non-generated token let tidx = lexer.tokens.indexOf(tsym); let ttok = tsym; while(ttok && ttok._loc == -1){ ttok = lexer.tokens[--tidx]; } var tloc = ttok ? ttok._loc : -1; var tend = tloc > -1 ? (tloc + (ttok._len || 0)) : -1; var tpos = tloc != -1 ? "[" + ttok._loc + ":" + ttok._len + "]" : '[0:0]'; if (lexer.showPosition) { errStr = 'Parse error at '+(tpos)+":\n"+lexer.showPosition()+"\nExpecting "+expected.join(', ') + ", got '" + (tok)+ "'"; } else { // errStr = 'Parse error at '+(tpos)+": Unexpected " + (symbol == EOF ? "end of input" : ("'"+(tok)+"'")); errStr = "Unexpected " + (symbol == EOF ? "end of input" : ("'"+(tok)+"'")); } if(script){ let err = script.addDiagnostic('error',{ message: errStr, source: 'imba-parser', range: script.rangeAt(tloc,tend) }) err.raise(); } self.parseError(errStr, { lexer: lexer, text: lexer.match, token: tok, offset: tloc, length: (tend - tloc), start: {offset: tloc}, end: {offset: tend}, line: lexer.yylineno, expected: expected, recoverable: (error_rule_depth !== false) }); } else if (preErrorSymbol !== EOF) { error_rule_depth = locateNearestErrorRecoveryRule(state); } // just recovered from another error if (recovering == 3) { if (symbol === EOF || preErrorSymbol === EOF) { throw new Error(errStr || 'Parsing halted while starting to recover from another error.'); } // discard current lookahead and grab another yytext = lexer.yytext; } // try to recover from error if (error_rule_depth === false) { throw new Error(errStr || 'Parsing halted. No suitable error recovery rule available.'); } popStack(error_rule_depth); preErrorSymbol = (symbol == TERROR ? null : symbol); // save the lookahead token symbol = TERROR; // insert generic error symbol as new lookahead state = stack[stack.length-1]; action = table[state] && table[state][TERROR]; recovering = 3; // allow 3 real symbols to be shifted before reporting a new error } var __sym = this.symbols_; var __prod = this.productions_; while (true) { // retreive state number from top of stack state = stack[stack.length - 1]; if (symbol === null || typeof symbol == 'undefined') { symbol = __sym[lexer.lex()] || EOF; } action = table[state] && table[state][symbol]; _handle_error: if (typeof action === 'undefined' || !action.length || !action[0]) { handleError(); } switch (action[0]) { case 1: // shift stack.push(symbol); stack.push(action[1]); // push state vstack.push(lexer.yytext); symbol = null; if (!preErrorSymbol) { // normal execution/no error yytext = lexer.yytext; if (recovering > 0) { recovering--; } } else { // error just occurred, resume old lookahead f/ before error symbol = preErrorSymbol; preErrorSymbol = null; } break; case 2: len = __prod[action[1]][1]; // perform semantic action yyval.$ = vstack[vstack.length-len]; r = this.performAction(yyval, yytext, yy, action[1], vstack); if (typeof r !== 'undefined') { return r; } while(len > 0) { stack.pop(); stack.pop(); vstack.pop(); len--; } stack.push(__prod[action[1]][0]); newState = table[stack[stack.length-2]][stack[stack.length-1]]; stack.push(newState); vstack.push(yyval.$); break; case 3: return true; } } return true; }; parser.init = function parser_init (dict) { this.table = dict.table; this.defaultActions = dict.defaultActions; this.performAction = dict.performAction; this.productions_ = dict.productions_; this.symbols_ = dict.symbols_; this.terminals_ = dict.terminals_; }; /* * LR(0) Parser * */ var lr0 = generator.beget(lookaheadMixin, lrGeneratorMixin, { type: "LR(0)", afterconstructor: function lr0_afterconstructor () { this.buildTable(); } }); var LR0Generator = exports.LR0Generator = lr0.construct(); /* * Simple LALR(1) * */ var lalr = generator.beget(lookaheadMixin, lrGeneratorMixin, { type: "LALR(1)", afterconstructor: function (grammar, options) { if (this.DEBUG) this.mix(lrGeneratorDebug, lalrGeneratorDebug); // mixin debug methods options = options || {}; this.states = this.canonicalCollection(); this.terms_ = {}; var newg = this.newg = typal.beget(lookaheadMixin,{ oldg: this, trace: this.trace, nterms_: {}, DEBUG: false, go_: function (r, B) { r = r.split(":")[0]; // grab state # B = B.map(function (b) { return b.slice(b.indexOf(":")+1); }); return this.oldg.go(r, B); } }); newg.nonterminals = {}; newg.productions = []; this.inadequateStates = []; // if true, only lookaheads in inadequate states are computed (faster, larger table) // if false, lookaheads for all reductions will be computed (slower, smaller table) this.onDemandLookahead = options.onDemandLookahead || false; this.buildNewGrammar(); newg.computeLookaheads(); this.unionLookaheads(); this.table = this.parseTable(this.states); this.defaultActions = findDefaults(this.table); }, lookAheads: function LALR_lookaheads (state, item) { return (!!this.onDemandLookahead && !state.inadequate) ? this.terminals : item.follows; }, go: function LALR_go (p, w) { var q = parseInt(p, 10); for (var i=0;i