blob: 6c095df6d1e894366aafc7b81ad91856edd3290c [file] [log] [blame]
// Copyright 2022 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
import assert from 'assert'; // not esm
import * as path from 'path';
import * as fs from 'fs/promises';
import { describe, it, before } from 'mocha';
import * as tm from 'vscode-textmate';
import * as onig from 'vscode-oniguruma';
/**
* initializes a textmate grammar registry that loads files of the form
* `${kind}.tmLanguage.json` from the given dir
*/
async function initRegistry(grammarDir: string): Promise<tm.Registry> {
const onigWasmPath = require.resolve('vscode-oniguruma').replace(/main\.js$/, 'onig.wasm');
const onigWasmRaw = await fs.readFile(onigWasmPath);
// the constructor takes a *promise*? weird
const onigLib = onig.loadWASM(onigWasmRaw).then(() => ({
createOnigScanner(patterns: string[]) { return new onig.OnigScanner(patterns); },
createOnigString(s: string) { return new onig.OnigString(s); }
}));
return new tm.Registry({
onigLib,
loadGrammar: async (scopeName) => {
// scope is this weird thing that's usually "source.${ext}",
// but is basically just a way to generically name an file type
// (it's a weird textmate quirk)
const parts = scopeName.match(/^source\.(.+)$/);
if (!parts) {
// unknown scope, not source.ext
console.error('unknown grammar requested, scope not in source.ext form', scopeName);
return null;
}
const [_source, ext] = parts;
const grammarFile = path.join(grammarDir, `${ext}.tmLanguage.json`);
let contents;
try {
contents = await fs.readFile(grammarFile, 'utf8');
} catch (ex) {
// not a known grammar, couldn't read
throw new Error(`unkown grammar ${ext} (${grammarFile}) requested: ${ex}`);
}
// need the filename to indicate that this is JSON, no a plist
return await tm.parseRawGrammar(contents, `${ext}.tmLanguage.json`);
}
});
}
/**
* a single tokenized line (with the raw line + tokens)
*/
interface TokenizedLine {
contents: string;
tokens: tm.IToken[];
}
/**
* break a source file down into a set of tokenized lines based on the given grammar
*/
function tokenize(code: string, grammar: tm.IGrammar): TokenizedLine[] {
const lines = code.split(/\r?\n/);
let ruleStack = tm.INITIAL;
return lines.map((contents) => {
let toks = grammar.tokenizeLine(contents, ruleStack);
ruleStack = toks.ruleStack;
return { contents, tokens: toks.tokens };
});
}
/**
* parse a vscode-tmgrammar-test snapshot into the same kind of tokenized
* lines as returned by #tokenize
*/
function parseSnapshot(snapshot: string): TokenizedLine[] {
const lines = snapshot.split(/\r?\n/);
if (lines.length < 1) {
throw new Error('empty snapshot');
}
const parsedLines = [];
const splitLine = (line: string): ['#' | '>', string] => {
if (line.length < 1) { throw new Error('empty line in snapshot'); }
const [indicator, contents] = [line[0], line.slice(1)];
if (indicator !== '#' && indicator !== '>') {
throw new Error(`unknown snapshot line start character '${indicator}'`);
}
return [indicator, contents];
};
let [firstInd, firstContents] = splitLine(lines.shift()!);
if (firstInd !== '>') {
throw new Error('first line of snapshot was not a source line');
}
let lastLine: TokenizedLine = {
contents: firstContents,
tokens: [],
};
for (const line of lines) {
// the format (seems, this is blakckbox for now) to be that lines
// either start with `>` to indicate source lines, or `#` to indicate
// spans
const [indicator, contents] = splitLine(line);
switch (indicator) {
case '#':
// more tokens for the last line
// token lines go /^\w*(?<span>\^+) (?<kind>.+)$/
// where `span` indicates the portion of text that they cover,
// and kind indicates the token type.
// kind is made up of space-separated scopes
//
// It's human readable?
// split out the raw parts
const splitOut = contents.match(/^(\s*)(\^+) (.+)$/);
if (!splitOut) {
throw new Error(`malformed token line '${contents}' in snapshot`);
}
const [_full, ws, spanRaw, scopesRaw] = splitOut;
// convert them to textmate tokens
// start index is simply the amount of space before the first carent,
// and end index is that plus the length of the carets
let [startIndex, endIndex] = [ws.length, ws.length + spanRaw.length];
lastLine.tokens.push({
startIndex, endIndex,
scopes: scopesRaw.split(' '),
});
break;
case '>':
// we've got a new source line, push the last one
// (we pre-push the first source line, so this is guaranteed to be *something*)
parsedLines.push(lastLine);
lastLine = {
contents,
tokens: [],
};
break;
}
}
// make sure we get the last line, since there's not another line to trigger a push
parsedLines.push(lastLine);
return parsedLines;
}
/**
* run a single snapshot test, comparing sourceFile to `${sourceFile}.snap` via
* the given grammar
*/
async function snapTest(sourceFile: string, grammar: tm.IGrammar): Promise<void> {
let source = await fs.readFile(sourceFile, 'utf8');
let snap = await fs.readFile(`${sourceFile}.snap`, 'utf8');
let expected = parseSnapshot(snap);
let actual = tokenize(source, grammar).map((line) => {
// for reasons unbeknownst to anyone vscode-tmgrammar-snap skips generating
// tokenization in snapshots for "empty" (empty or whitespace-only) lines,
// meaning we need to remove tokens for those here (since they'll normally
// have a root scope attached)
if (line.contents.trim().length === 0) {
line.tokens = [];
}
return line;
});
assert.deepStrictEqual(expected, actual);
}
// Dynamically create the syntax tests
const setupTests = (async () => {
const grammarDir = path.join(__dirname, '..', 'dist', 'resources');
const rootSnapDir = path.join(__dirname, '..', 'src', 'test', 'suite', 'snap');
const kinds = ['fidl', 'cml'];
const snapFiles: Map<string, [string, string][]> = new Map();
for (const kind of kinds) {
const snapDir = path.join(rootSnapDir, kind);
// filter out the `.snap` files, so we're only left with the base input files
const files: [string, string][] = (await fs.readdir(snapDir)).
filter((file) => path.extname(file) === `.${kind}`).
map((file) => [file, path.join(snapDir, file)]);
snapFiles.set(kind, files);
}
describe('syntax highlighting', function () {
let registry: tm.Registry;
before(async function () {
registry = await initRegistry(grammarDir);
});
for (const kind of kinds) {
describe(`for ${kind}`, function () {
let grammar: tm.IGrammar;
before(async function () {
let maybeGrammar = await registry.loadGrammar(`source.${kind}`);
if (!maybeGrammar) {
throw new Error(`couldn't load grammar for ${kind}`);
}
grammar = maybeGrammar;
});
for (const [file, fullPath] of snapFiles.get(kind)! /* not undefined b/c above */) {
it(`should highlight ${path.basename(file)}`, async function () {
await snapTest(fullPath, grammar);
});
}
});
}
});
})(); // no top-level await, so iffes it is
// use a "fake" test to get the async code to run synchronously
// and surface errors as failures
describe('syntax test setup', function () {
it('will load all the tests [ignore this]', async function () {
await setupTests;
});
});