| // Copyright 2022 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| import assert from 'assert'; // not esm |
| import * as path from 'path'; |
| import * as fs from 'fs/promises'; |
| import { describe, it, before } from 'mocha'; |
| import * as tm from 'vscode-textmate'; |
| import * as onig from 'vscode-oniguruma'; |
| |
| /** |
| * initializes a textmate grammar registry that loads files of the form |
| * `${kind}.tmLanguage.json` from the given dir |
| */ |
| async function initRegistry(grammarDir: string): Promise<tm.Registry> { |
| const onigWasmPath = require.resolve('vscode-oniguruma').replace(/main\.js$/, 'onig.wasm'); |
| const onigWasmRaw = await fs.readFile(onigWasmPath); |
| // the constructor takes a *promise*? weird |
| const onigLib = onig.loadWASM(onigWasmRaw).then(() => ({ |
| createOnigScanner(patterns: string[]) { return new onig.OnigScanner(patterns); }, |
| createOnigString(s: string) { return new onig.OnigString(s); } |
| })); |
| |
| return new tm.Registry({ |
| onigLib, |
| loadGrammar: async (scopeName) => { |
| // scope is this weird thing that's usually "source.${ext}", |
| // but is basically just a way to generically name an file type |
| // (it's a weird textmate quirk) |
| const parts = scopeName.match(/^source\.(.+)$/); |
| if (!parts) { |
| // unknown scope, not source.ext |
| console.error('unknown grammar requested, scope not in source.ext form', scopeName); |
| return null; |
| } |
| const [_source, ext] = parts; |
| const grammarFile = path.join(grammarDir, `${ext}.tmLanguage.json`); |
| let contents; |
| try { |
| contents = await fs.readFile(grammarFile, 'utf8'); |
| } catch (ex) { |
| // not a known grammar, couldn't read |
| throw new Error(`unkown grammar ${ext} (${grammarFile}) requested: ${ex}`); |
| } |
| |
| // need the filename to indicate that this is JSON, no a plist |
| return await tm.parseRawGrammar(contents, `${ext}.tmLanguage.json`); |
| } |
| }); |
| } |
| |
| /** |
| * a single tokenized line (with the raw line + tokens) |
| */ |
| interface TokenizedLine { |
| contents: string; |
| tokens: tm.IToken[]; |
| } |
| |
| /** |
| * break a source file down into a set of tokenized lines based on the given grammar |
| */ |
| function tokenize(code: string, grammar: tm.IGrammar): TokenizedLine[] { |
| const lines = code.split(/\r?\n/); |
| let ruleStack = tm.INITIAL; |
| return lines.map((contents) => { |
| let toks = grammar.tokenizeLine(contents, ruleStack); |
| ruleStack = toks.ruleStack; |
| return { contents, tokens: toks.tokens }; |
| }); |
| } |
| |
| /** |
| * parse a vscode-tmgrammar-test snapshot into the same kind of tokenized |
| * lines as returned by #tokenize |
| */ |
| function parseSnapshot(snapshot: string): TokenizedLine[] { |
| const lines = snapshot.split(/\r?\n/); |
| if (lines.length < 1) { |
| throw new Error('empty snapshot'); |
| } |
| const parsedLines = []; |
| const splitLine = (line: string): ['#' | '>', string] => { |
| if (line.length < 1) { throw new Error('empty line in snapshot'); } |
| const [indicator, contents] = [line[0], line.slice(1)]; |
| if (indicator !== '#' && indicator !== '>') { |
| throw new Error(`unknown snapshot line start character '${indicator}'`); |
| } |
| return [indicator, contents]; |
| }; |
| |
| let [firstInd, firstContents] = splitLine(lines.shift()!); |
| if (firstInd !== '>') { |
| throw new Error('first line of snapshot was not a source line'); |
| } |
| |
| let lastLine: TokenizedLine = { |
| contents: firstContents, |
| tokens: [], |
| }; |
| for (const line of lines) { |
| // the format (seems, this is blakckbox for now) to be that lines |
| // either start with `>` to indicate source lines, or `#` to indicate |
| // spans |
| |
| const [indicator, contents] = splitLine(line); |
| |
| switch (indicator) { |
| case '#': |
| // more tokens for the last line |
| // token lines go /^\w*(?<span>\^+) (?<kind>.+)$/ |
| // where `span` indicates the portion of text that they cover, |
| // and kind indicates the token type. |
| // kind is made up of space-separated scopes |
| // |
| // It's human readable? |
| |
| // split out the raw parts |
| const splitOut = contents.match(/^(\s*)(\^+) (.+)$/); |
| if (!splitOut) { |
| throw new Error(`malformed token line '${contents}' in snapshot`); |
| } |
| const [_full, ws, spanRaw, scopesRaw] = splitOut; |
| |
| // convert them to textmate tokens |
| // start index is simply the amount of space before the first carent, |
| // and end index is that plus the length of the carets |
| let [startIndex, endIndex] = [ws.length, ws.length + spanRaw.length]; |
| lastLine.tokens.push({ |
| startIndex, endIndex, |
| scopes: scopesRaw.split(' '), |
| }); |
| break; |
| case '>': |
| // we've got a new source line, push the last one |
| // (we pre-push the first source line, so this is guaranteed to be *something*) |
| parsedLines.push(lastLine); |
| lastLine = { |
| contents, |
| tokens: [], |
| }; |
| break; |
| } |
| } |
| // make sure we get the last line, since there's not another line to trigger a push |
| parsedLines.push(lastLine); |
| |
| return parsedLines; |
| } |
| |
| /** |
| * run a single snapshot test, comparing sourceFile to `${sourceFile}.snap` via |
| * the given grammar |
| */ |
| async function snapTest(sourceFile: string, grammar: tm.IGrammar): Promise<void> { |
| let source = await fs.readFile(sourceFile, 'utf8'); |
| let snap = await fs.readFile(`${sourceFile}.snap`, 'utf8'); |
| |
| let expected = parseSnapshot(snap); |
| let actual = tokenize(source, grammar).map((line) => { |
| // for reasons unbeknownst to anyone vscode-tmgrammar-snap skips generating |
| // tokenization in snapshots for "empty" (empty or whitespace-only) lines, |
| // meaning we need to remove tokens for those here (since they'll normally |
| // have a root scope attached) |
| if (line.contents.trim().length === 0) { |
| line.tokens = []; |
| } |
| return line; |
| }); |
| |
| assert.deepStrictEqual(expected, actual); |
| } |
| |
| // Dynamically create the syntax tests |
| const setupTests = (async () => { |
| const grammarDir = path.join(__dirname, '..', 'dist', 'resources'); |
| const rootSnapDir = path.join(__dirname, '..', 'src', 'test', 'suite', 'snap'); |
| const kinds = ['fidl', 'cml']; |
| |
| const snapFiles: Map<string, [string, string][]> = new Map(); |
| |
| for (const kind of kinds) { |
| const snapDir = path.join(rootSnapDir, kind); |
| // filter out the `.snap` files, so we're only left with the base input files |
| const files: [string, string][] = (await fs.readdir(snapDir)). |
| filter((file) => path.extname(file) === `.${kind}`). |
| map((file) => [file, path.join(snapDir, file)]); |
| snapFiles.set(kind, files); |
| } |
| |
| describe('syntax highlighting', function () { |
| let registry: tm.Registry; |
| before(async function () { |
| registry = await initRegistry(grammarDir); |
| }); |
| |
| for (const kind of kinds) { |
| describe(`for ${kind}`, function () { |
| let grammar: tm.IGrammar; |
| before(async function () { |
| let maybeGrammar = await registry.loadGrammar(`source.${kind}`); |
| if (!maybeGrammar) { |
| throw new Error(`couldn't load grammar for ${kind}`); |
| } |
| grammar = maybeGrammar; |
| }); |
| |
| for (const [file, fullPath] of snapFiles.get(kind)! /* not undefined b/c above */) { |
| it(`should highlight ${path.basename(file)}`, async function () { |
| await snapTest(fullPath, grammar); |
| }); |
| } |
| }); |
| } |
| }); |
| })(); // no top-level await, so iffes it is |
| |
| // use a "fake" test to get the async code to run synchronously |
| // and surface errors as failures |
| describe('syntax test setup', function () { |
| it('will load all the tests [ignore this]', async function () { |
| await setupTests; |
| }); |
| }); |