You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
444 lines
19 KiB
444 lines
19 KiB
2 years ago
|
/*---------------------------------------------------------------------------------------------
|
||
|
* Copyright (c) Microsoft Corporation. All rights reserved.
|
||
|
* Licensed under the MIT License. See License.txt in the project root for license information.
|
||
|
*--------------------------------------------------------------------------------------------*/
|
||
|
'use strict';
|
||
|
/**
|
||
|
* Creates a JSON scanner on the given text.
|
||
|
* If ignoreTrivia is set, whitespaces or comments are ignored.
|
||
|
*/
|
||
|
export function createScanner(text, ignoreTrivia = false) {
|
||
|
const len = text.length;
|
||
|
let pos = 0, value = '', tokenOffset = 0, token = 16 /* SyntaxKind.Unknown */, lineNumber = 0, lineStartOffset = 0, tokenLineStartOffset = 0, prevTokenLineStartOffset = 0, scanError = 0 /* ScanError.None */;
|
||
|
function scanHexDigits(count, exact) {
|
||
|
let digits = 0;
|
||
|
let value = 0;
|
||
|
while (digits < count || !exact) {
|
||
|
let ch = text.charCodeAt(pos);
|
||
|
if (ch >= 48 /* CharacterCodes._0 */ && ch <= 57 /* CharacterCodes._9 */) {
|
||
|
value = value * 16 + ch - 48 /* CharacterCodes._0 */;
|
||
|
}
|
||
|
else if (ch >= 65 /* CharacterCodes.A */ && ch <= 70 /* CharacterCodes.F */) {
|
||
|
value = value * 16 + ch - 65 /* CharacterCodes.A */ + 10;
|
||
|
}
|
||
|
else if (ch >= 97 /* CharacterCodes.a */ && ch <= 102 /* CharacterCodes.f */) {
|
||
|
value = value * 16 + ch - 97 /* CharacterCodes.a */ + 10;
|
||
|
}
|
||
|
else {
|
||
|
break;
|
||
|
}
|
||
|
pos++;
|
||
|
digits++;
|
||
|
}
|
||
|
if (digits < count) {
|
||
|
value = -1;
|
||
|
}
|
||
|
return value;
|
||
|
}
|
||
|
function setPosition(newPosition) {
|
||
|
pos = newPosition;
|
||
|
value = '';
|
||
|
tokenOffset = 0;
|
||
|
token = 16 /* SyntaxKind.Unknown */;
|
||
|
scanError = 0 /* ScanError.None */;
|
||
|
}
|
||
|
function scanNumber() {
|
||
|
let start = pos;
|
||
|
if (text.charCodeAt(pos) === 48 /* CharacterCodes._0 */) {
|
||
|
pos++;
|
||
|
}
|
||
|
else {
|
||
|
pos++;
|
||
|
while (pos < text.length && isDigit(text.charCodeAt(pos))) {
|
||
|
pos++;
|
||
|
}
|
||
|
}
|
||
|
if (pos < text.length && text.charCodeAt(pos) === 46 /* CharacterCodes.dot */) {
|
||
|
pos++;
|
||
|
if (pos < text.length && isDigit(text.charCodeAt(pos))) {
|
||
|
pos++;
|
||
|
while (pos < text.length && isDigit(text.charCodeAt(pos))) {
|
||
|
pos++;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
scanError = 3 /* ScanError.UnexpectedEndOfNumber */;
|
||
|
return text.substring(start, pos);
|
||
|
}
|
||
|
}
|
||
|
let end = pos;
|
||
|
if (pos < text.length && (text.charCodeAt(pos) === 69 /* CharacterCodes.E */ || text.charCodeAt(pos) === 101 /* CharacterCodes.e */)) {
|
||
|
pos++;
|
||
|
if (pos < text.length && text.charCodeAt(pos) === 43 /* CharacterCodes.plus */ || text.charCodeAt(pos) === 45 /* CharacterCodes.minus */) {
|
||
|
pos++;
|
||
|
}
|
||
|
if (pos < text.length && isDigit(text.charCodeAt(pos))) {
|
||
|
pos++;
|
||
|
while (pos < text.length && isDigit(text.charCodeAt(pos))) {
|
||
|
pos++;
|
||
|
}
|
||
|
end = pos;
|
||
|
}
|
||
|
else {
|
||
|
scanError = 3 /* ScanError.UnexpectedEndOfNumber */;
|
||
|
}
|
||
|
}
|
||
|
return text.substring(start, end);
|
||
|
}
|
||
|
function scanString() {
|
||
|
let result = '', start = pos;
|
||
|
while (true) {
|
||
|
if (pos >= len) {
|
||
|
result += text.substring(start, pos);
|
||
|
scanError = 2 /* ScanError.UnexpectedEndOfString */;
|
||
|
break;
|
||
|
}
|
||
|
const ch = text.charCodeAt(pos);
|
||
|
if (ch === 34 /* CharacterCodes.doubleQuote */) {
|
||
|
result += text.substring(start, pos);
|
||
|
pos++;
|
||
|
break;
|
||
|
}
|
||
|
if (ch === 92 /* CharacterCodes.backslash */) {
|
||
|
result += text.substring(start, pos);
|
||
|
pos++;
|
||
|
if (pos >= len) {
|
||
|
scanError = 2 /* ScanError.UnexpectedEndOfString */;
|
||
|
break;
|
||
|
}
|
||
|
const ch2 = text.charCodeAt(pos++);
|
||
|
switch (ch2) {
|
||
|
case 34 /* CharacterCodes.doubleQuote */:
|
||
|
result += '\"';
|
||
|
break;
|
||
|
case 92 /* CharacterCodes.backslash */:
|
||
|
result += '\\';
|
||
|
break;
|
||
|
case 47 /* CharacterCodes.slash */:
|
||
|
result += '/';
|
||
|
break;
|
||
|
case 98 /* CharacterCodes.b */:
|
||
|
result += '\b';
|
||
|
break;
|
||
|
case 102 /* CharacterCodes.f */:
|
||
|
result += '\f';
|
||
|
break;
|
||
|
case 110 /* CharacterCodes.n */:
|
||
|
result += '\n';
|
||
|
break;
|
||
|
case 114 /* CharacterCodes.r */:
|
||
|
result += '\r';
|
||
|
break;
|
||
|
case 116 /* CharacterCodes.t */:
|
||
|
result += '\t';
|
||
|
break;
|
||
|
case 117 /* CharacterCodes.u */:
|
||
|
const ch3 = scanHexDigits(4, true);
|
||
|
if (ch3 >= 0) {
|
||
|
result += String.fromCharCode(ch3);
|
||
|
}
|
||
|
else {
|
||
|
scanError = 4 /* ScanError.InvalidUnicode */;
|
||
|
}
|
||
|
break;
|
||
|
default:
|
||
|
scanError = 5 /* ScanError.InvalidEscapeCharacter */;
|
||
|
}
|
||
|
start = pos;
|
||
|
continue;
|
||
|
}
|
||
|
if (ch >= 0 && ch <= 0x1f) {
|
||
|
if (isLineBreak(ch)) {
|
||
|
result += text.substring(start, pos);
|
||
|
scanError = 2 /* ScanError.UnexpectedEndOfString */;
|
||
|
break;
|
||
|
}
|
||
|
else {
|
||
|
scanError = 6 /* ScanError.InvalidCharacter */;
|
||
|
// mark as error but continue with string
|
||
|
}
|
||
|
}
|
||
|
pos++;
|
||
|
}
|
||
|
return result;
|
||
|
}
|
||
|
function scanNext() {
|
||
|
value = '';
|
||
|
scanError = 0 /* ScanError.None */;
|
||
|
tokenOffset = pos;
|
||
|
lineStartOffset = lineNumber;
|
||
|
prevTokenLineStartOffset = tokenLineStartOffset;
|
||
|
if (pos >= len) {
|
||
|
// at the end
|
||
|
tokenOffset = len;
|
||
|
return token = 17 /* SyntaxKind.EOF */;
|
||
|
}
|
||
|
let code = text.charCodeAt(pos);
|
||
|
// trivia: whitespace
|
||
|
if (isWhiteSpace(code)) {
|
||
|
do {
|
||
|
pos++;
|
||
|
value += String.fromCharCode(code);
|
||
|
code = text.charCodeAt(pos);
|
||
|
} while (isWhiteSpace(code));
|
||
|
return token = 15 /* SyntaxKind.Trivia */;
|
||
|
}
|
||
|
// trivia: newlines
|
||
|
if (isLineBreak(code)) {
|
||
|
pos++;
|
||
|
value += String.fromCharCode(code);
|
||
|
if (code === 13 /* CharacterCodes.carriageReturn */ && text.charCodeAt(pos) === 10 /* CharacterCodes.lineFeed */) {
|
||
|
pos++;
|
||
|
value += '\n';
|
||
|
}
|
||
|
lineNumber++;
|
||
|
tokenLineStartOffset = pos;
|
||
|
return token = 14 /* SyntaxKind.LineBreakTrivia */;
|
||
|
}
|
||
|
switch (code) {
|
||
|
// tokens: []{}:,
|
||
|
case 123 /* CharacterCodes.openBrace */:
|
||
|
pos++;
|
||
|
return token = 1 /* SyntaxKind.OpenBraceToken */;
|
||
|
case 125 /* CharacterCodes.closeBrace */:
|
||
|
pos++;
|
||
|
return token = 2 /* SyntaxKind.CloseBraceToken */;
|
||
|
case 91 /* CharacterCodes.openBracket */:
|
||
|
pos++;
|
||
|
return token = 3 /* SyntaxKind.OpenBracketToken */;
|
||
|
case 93 /* CharacterCodes.closeBracket */:
|
||
|
pos++;
|
||
|
return token = 4 /* SyntaxKind.CloseBracketToken */;
|
||
|
case 58 /* CharacterCodes.colon */:
|
||
|
pos++;
|
||
|
return token = 6 /* SyntaxKind.ColonToken */;
|
||
|
case 44 /* CharacterCodes.comma */:
|
||
|
pos++;
|
||
|
return token = 5 /* SyntaxKind.CommaToken */;
|
||
|
// strings
|
||
|
case 34 /* CharacterCodes.doubleQuote */:
|
||
|
pos++;
|
||
|
value = scanString();
|
||
|
return token = 10 /* SyntaxKind.StringLiteral */;
|
||
|
// comments
|
||
|
case 47 /* CharacterCodes.slash */:
|
||
|
const start = pos - 1;
|
||
|
// Single-line comment
|
||
|
if (text.charCodeAt(pos + 1) === 47 /* CharacterCodes.slash */) {
|
||
|
pos += 2;
|
||
|
while (pos < len) {
|
||
|
if (isLineBreak(text.charCodeAt(pos))) {
|
||
|
break;
|
||
|
}
|
||
|
pos++;
|
||
|
}
|
||
|
value = text.substring(start, pos);
|
||
|
return token = 12 /* SyntaxKind.LineCommentTrivia */;
|
||
|
}
|
||
|
// Multi-line comment
|
||
|
if (text.charCodeAt(pos + 1) === 42 /* CharacterCodes.asterisk */) {
|
||
|
pos += 2;
|
||
|
const safeLength = len - 1; // For lookahead.
|
||
|
let commentClosed = false;
|
||
|
while (pos < safeLength) {
|
||
|
const ch = text.charCodeAt(pos);
|
||
|
if (ch === 42 /* CharacterCodes.asterisk */ && text.charCodeAt(pos + 1) === 47 /* CharacterCodes.slash */) {
|
||
|
pos += 2;
|
||
|
commentClosed = true;
|
||
|
break;
|
||
|
}
|
||
|
pos++;
|
||
|
if (isLineBreak(ch)) {
|
||
|
if (ch === 13 /* CharacterCodes.carriageReturn */ && text.charCodeAt(pos) === 10 /* CharacterCodes.lineFeed */) {
|
||
|
pos++;
|
||
|
}
|
||
|
lineNumber++;
|
||
|
tokenLineStartOffset = pos;
|
||
|
}
|
||
|
}
|
||
|
if (!commentClosed) {
|
||
|
pos++;
|
||
|
scanError = 1 /* ScanError.UnexpectedEndOfComment */;
|
||
|
}
|
||
|
value = text.substring(start, pos);
|
||
|
return token = 13 /* SyntaxKind.BlockCommentTrivia */;
|
||
|
}
|
||
|
// just a single slash
|
||
|
value += String.fromCharCode(code);
|
||
|
pos++;
|
||
|
return token = 16 /* SyntaxKind.Unknown */;
|
||
|
// numbers
|
||
|
case 45 /* CharacterCodes.minus */:
|
||
|
value += String.fromCharCode(code);
|
||
|
pos++;
|
||
|
if (pos === len || !isDigit(text.charCodeAt(pos))) {
|
||
|
return token = 16 /* SyntaxKind.Unknown */;
|
||
|
}
|
||
|
// found a minus, followed by a number so
|
||
|
// we fall through to proceed with scanning
|
||
|
// numbers
|
||
|
case 48 /* CharacterCodes._0 */:
|
||
|
case 49 /* CharacterCodes._1 */:
|
||
|
case 50 /* CharacterCodes._2 */:
|
||
|
case 51 /* CharacterCodes._3 */:
|
||
|
case 52 /* CharacterCodes._4 */:
|
||
|
case 53 /* CharacterCodes._5 */:
|
||
|
case 54 /* CharacterCodes._6 */:
|
||
|
case 55 /* CharacterCodes._7 */:
|
||
|
case 56 /* CharacterCodes._8 */:
|
||
|
case 57 /* CharacterCodes._9 */:
|
||
|
value += scanNumber();
|
||
|
return token = 11 /* SyntaxKind.NumericLiteral */;
|
||
|
// literals and unknown symbols
|
||
|
default:
|
||
|
// is a literal? Read the full word.
|
||
|
while (pos < len && isUnknownContentCharacter(code)) {
|
||
|
pos++;
|
||
|
code = text.charCodeAt(pos);
|
||
|
}
|
||
|
if (tokenOffset !== pos) {
|
||
|
value = text.substring(tokenOffset, pos);
|
||
|
// keywords: true, false, null
|
||
|
switch (value) {
|
||
|
case 'true': return token = 8 /* SyntaxKind.TrueKeyword */;
|
||
|
case 'false': return token = 9 /* SyntaxKind.FalseKeyword */;
|
||
|
case 'null': return token = 7 /* SyntaxKind.NullKeyword */;
|
||
|
}
|
||
|
return token = 16 /* SyntaxKind.Unknown */;
|
||
|
}
|
||
|
// some
|
||
|
value += String.fromCharCode(code);
|
||
|
pos++;
|
||
|
return token = 16 /* SyntaxKind.Unknown */;
|
||
|
}
|
||
|
}
|
||
|
function isUnknownContentCharacter(code) {
|
||
|
if (isWhiteSpace(code) || isLineBreak(code)) {
|
||
|
return false;
|
||
|
}
|
||
|
switch (code) {
|
||
|
case 125 /* CharacterCodes.closeBrace */:
|
||
|
case 93 /* CharacterCodes.closeBracket */:
|
||
|
case 123 /* CharacterCodes.openBrace */:
|
||
|
case 91 /* CharacterCodes.openBracket */:
|
||
|
case 34 /* CharacterCodes.doubleQuote */:
|
||
|
case 58 /* CharacterCodes.colon */:
|
||
|
case 44 /* CharacterCodes.comma */:
|
||
|
case 47 /* CharacterCodes.slash */:
|
||
|
return false;
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
function scanNextNonTrivia() {
|
||
|
let result;
|
||
|
do {
|
||
|
result = scanNext();
|
||
|
} while (result >= 12 /* SyntaxKind.LineCommentTrivia */ && result <= 15 /* SyntaxKind.Trivia */);
|
||
|
return result;
|
||
|
}
|
||
|
return {
|
||
|
setPosition: setPosition,
|
||
|
getPosition: () => pos,
|
||
|
scan: ignoreTrivia ? scanNextNonTrivia : scanNext,
|
||
|
getToken: () => token,
|
||
|
getTokenValue: () => value,
|
||
|
getTokenOffset: () => tokenOffset,
|
||
|
getTokenLength: () => pos - tokenOffset,
|
||
|
getTokenStartLine: () => lineStartOffset,
|
||
|
getTokenStartCharacter: () => tokenOffset - prevTokenLineStartOffset,
|
||
|
getTokenError: () => scanError,
|
||
|
};
|
||
|
}
|
||
|
function isWhiteSpace(ch) {
|
||
|
return ch === 32 /* CharacterCodes.space */ || ch === 9 /* CharacterCodes.tab */;
|
||
|
}
|
||
|
function isLineBreak(ch) {
|
||
|
return ch === 10 /* CharacterCodes.lineFeed */ || ch === 13 /* CharacterCodes.carriageReturn */;
|
||
|
}
|
||
|
function isDigit(ch) {
|
||
|
return ch >= 48 /* CharacterCodes._0 */ && ch <= 57 /* CharacterCodes._9 */;
|
||
|
}
|
||
|
var CharacterCodes;
|
||
|
(function (CharacterCodes) {
|
||
|
CharacterCodes[CharacterCodes["lineFeed"] = 10] = "lineFeed";
|
||
|
CharacterCodes[CharacterCodes["carriageReturn"] = 13] = "carriageReturn";
|
||
|
CharacterCodes[CharacterCodes["space"] = 32] = "space";
|
||
|
CharacterCodes[CharacterCodes["_0"] = 48] = "_0";
|
||
|
CharacterCodes[CharacterCodes["_1"] = 49] = "_1";
|
||
|
CharacterCodes[CharacterCodes["_2"] = 50] = "_2";
|
||
|
CharacterCodes[CharacterCodes["_3"] = 51] = "_3";
|
||
|
CharacterCodes[CharacterCodes["_4"] = 52] = "_4";
|
||
|
CharacterCodes[CharacterCodes["_5"] = 53] = "_5";
|
||
|
CharacterCodes[CharacterCodes["_6"] = 54] = "_6";
|
||
|
CharacterCodes[CharacterCodes["_7"] = 55] = "_7";
|
||
|
CharacterCodes[CharacterCodes["_8"] = 56] = "_8";
|
||
|
CharacterCodes[CharacterCodes["_9"] = 57] = "_9";
|
||
|
CharacterCodes[CharacterCodes["a"] = 97] = "a";
|
||
|
CharacterCodes[CharacterCodes["b"] = 98] = "b";
|
||
|
CharacterCodes[CharacterCodes["c"] = 99] = "c";
|
||
|
CharacterCodes[CharacterCodes["d"] = 100] = "d";
|
||
|
CharacterCodes[CharacterCodes["e"] = 101] = "e";
|
||
|
CharacterCodes[CharacterCodes["f"] = 102] = "f";
|
||
|
CharacterCodes[CharacterCodes["g"] = 103] = "g";
|
||
|
CharacterCodes[CharacterCodes["h"] = 104] = "h";
|
||
|
CharacterCodes[CharacterCodes["i"] = 105] = "i";
|
||
|
CharacterCodes[CharacterCodes["j"] = 106] = "j";
|
||
|
CharacterCodes[CharacterCodes["k"] = 107] = "k";
|
||
|
CharacterCodes[CharacterCodes["l"] = 108] = "l";
|
||
|
CharacterCodes[CharacterCodes["m"] = 109] = "m";
|
||
|
CharacterCodes[CharacterCodes["n"] = 110] = "n";
|
||
|
CharacterCodes[CharacterCodes["o"] = 111] = "o";
|
||
|
CharacterCodes[CharacterCodes["p"] = 112] = "p";
|
||
|
CharacterCodes[CharacterCodes["q"] = 113] = "q";
|
||
|
CharacterCodes[CharacterCodes["r"] = 114] = "r";
|
||
|
CharacterCodes[CharacterCodes["s"] = 115] = "s";
|
||
|
CharacterCodes[CharacterCodes["t"] = 116] = "t";
|
||
|
CharacterCodes[CharacterCodes["u"] = 117] = "u";
|
||
|
CharacterCodes[CharacterCodes["v"] = 118] = "v";
|
||
|
CharacterCodes[CharacterCodes["w"] = 119] = "w";
|
||
|
CharacterCodes[CharacterCodes["x"] = 120] = "x";
|
||
|
CharacterCodes[CharacterCodes["y"] = 121] = "y";
|
||
|
CharacterCodes[CharacterCodes["z"] = 122] = "z";
|
||
|
CharacterCodes[CharacterCodes["A"] = 65] = "A";
|
||
|
CharacterCodes[CharacterCodes["B"] = 66] = "B";
|
||
|
CharacterCodes[CharacterCodes["C"] = 67] = "C";
|
||
|
CharacterCodes[CharacterCodes["D"] = 68] = "D";
|
||
|
CharacterCodes[CharacterCodes["E"] = 69] = "E";
|
||
|
CharacterCodes[CharacterCodes["F"] = 70] = "F";
|
||
|
CharacterCodes[CharacterCodes["G"] = 71] = "G";
|
||
|
CharacterCodes[CharacterCodes["H"] = 72] = "H";
|
||
|
CharacterCodes[CharacterCodes["I"] = 73] = "I";
|
||
|
CharacterCodes[CharacterCodes["J"] = 74] = "J";
|
||
|
CharacterCodes[CharacterCodes["K"] = 75] = "K";
|
||
|
CharacterCodes[CharacterCodes["L"] = 76] = "L";
|
||
|
CharacterCodes[CharacterCodes["M"] = 77] = "M";
|
||
|
CharacterCodes[CharacterCodes["N"] = 78] = "N";
|
||
|
CharacterCodes[CharacterCodes["O"] = 79] = "O";
|
||
|
CharacterCodes[CharacterCodes["P"] = 80] = "P";
|
||
|
CharacterCodes[CharacterCodes["Q"] = 81] = "Q";
|
||
|
CharacterCodes[CharacterCodes["R"] = 82] = "R";
|
||
|
CharacterCodes[CharacterCodes["S"] = 83] = "S";
|
||
|
CharacterCodes[CharacterCodes["T"] = 84] = "T";
|
||
|
CharacterCodes[CharacterCodes["U"] = 85] = "U";
|
||
|
CharacterCodes[CharacterCodes["V"] = 86] = "V";
|
||
|
CharacterCodes[CharacterCodes["W"] = 87] = "W";
|
||
|
CharacterCodes[CharacterCodes["X"] = 88] = "X";
|
||
|
CharacterCodes[CharacterCodes["Y"] = 89] = "Y";
|
||
|
CharacterCodes[CharacterCodes["Z"] = 90] = "Z";
|
||
|
CharacterCodes[CharacterCodes["asterisk"] = 42] = "asterisk";
|
||
|
CharacterCodes[CharacterCodes["backslash"] = 92] = "backslash";
|
||
|
CharacterCodes[CharacterCodes["closeBrace"] = 125] = "closeBrace";
|
||
|
CharacterCodes[CharacterCodes["closeBracket"] = 93] = "closeBracket";
|
||
|
CharacterCodes[CharacterCodes["colon"] = 58] = "colon";
|
||
|
CharacterCodes[CharacterCodes["comma"] = 44] = "comma";
|
||
|
CharacterCodes[CharacterCodes["dot"] = 46] = "dot";
|
||
|
CharacterCodes[CharacterCodes["doubleQuote"] = 34] = "doubleQuote";
|
||
|
CharacterCodes[CharacterCodes["minus"] = 45] = "minus";
|
||
|
CharacterCodes[CharacterCodes["openBrace"] = 123] = "openBrace";
|
||
|
CharacterCodes[CharacterCodes["openBracket"] = 91] = "openBracket";
|
||
|
CharacterCodes[CharacterCodes["plus"] = 43] = "plus";
|
||
|
CharacterCodes[CharacterCodes["slash"] = 47] = "slash";
|
||
|
CharacterCodes[CharacterCodes["formFeed"] = 12] = "formFeed";
|
||
|
CharacterCodes[CharacterCodes["tab"] = 9] = "tab";
|
||
|
})(CharacterCodes || (CharacterCodes = {}));
|