src/test/suite/snap.test.ts - vscode-plugins - Git at Google

 // Copyright 2022 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 import assert from 'assert'; // not esm
 import * as path from 'path';
 import * as fs from 'fs/promises';
 import { describe, it, before } from 'mocha';
 import * as tm from 'vscode-textmate';
 import * as onig from 'vscode-oniguruma';

 /**
   * initializes a textmate grammar registry that loads files of the form
   * `${kind}.tmLanguage.json` from the given dir
   */
 async function initRegistry(grammarDir: string): Promise<tm.Registry> {
   const onigWasmPath = require.resolve('vscode-oniguruma').replace(/main\.js$/, 'onig.wasm');
   const onigWasmRaw = await fs.readFile(onigWasmPath);
   // the constructor takes a *promise*? weird
   const onigLib = onig.loadWASM(onigWasmRaw).then(() => ({
     createOnigScanner(patterns: string[]) { return new onig.OnigScanner(patterns); },
     createOnigString(s: string) { return new onig.OnigString(s); }
   }));

   return new tm.Registry({
     onigLib,
     loadGrammar: async (scopeName) => {
       // scope is this weird thing that's usually "source.${ext}",
       // but is basically just a way to generically name an file type
       // (it's a weird textmate quirk)
       const parts = scopeName.match(/^source\.(.+)$/);
       if (!parts) {
         // unknown scope, not source.ext
         console.error('unknown grammar requested, scope not in source.ext form', scopeName);
         return null;
       }
       const [_source, ext] = parts;
       const grammarFile = path.join(grammarDir, `${ext}.tmLanguage.json`);
       let contents;
       try {
         contents = await fs.readFile(grammarFile, 'utf8');
       } catch (ex) {
         // not a known grammar, couldn't read
         throw new Error(`unkown grammar ${ext} (${grammarFile}) requested: ${ex}`);
       }

       // need the filename to indicate that this is JSON, no a plist
       return await tm.parseRawGrammar(contents, `${ext}.tmLanguage.json`);
     }
   });
 }

 /**
   * a single tokenized line (with the raw line + tokens)
   */
 interface TokenizedLine {
   contents: string;
   tokens: tm.IToken[];
 }

 /**
   * break a source file down into a set of tokenized lines based on the given grammar
   */
 function tokenize(code: string, grammar: tm.IGrammar): TokenizedLine[] {
   const lines = code.split(/\r?\n/);
   let ruleStack = tm.INITIAL;
   return lines.map((contents) => {
     let toks = grammar.tokenizeLine(contents, ruleStack);
     ruleStack = toks.ruleStack;
     return { contents, tokens: toks.tokens };
   });
 }

 /**
   * parse a vscode-tmgrammar-test snapshot into the same kind of tokenized
   * lines as returned by #tokenize
   */
 function parseSnapshot(snapshot: string): TokenizedLine[] {
   const lines = snapshot.split(/\r?\n/);
   if (lines.length < 1) {
     throw new Error('empty snapshot');
   }
   const parsedLines = [];
   const splitLine = (line: string): ['#' | '>', string] => {
     if (line.length < 1) { throw new Error('empty line in snapshot'); }
     const [indicator, contents] = [line[0], line.slice(1)];
     if (indicator !== '#' && indicator !== '>') {
       throw new Error(`unknown snapshot line start character '${indicator}'`);
     }
     return [indicator, contents];
   };

   let [firstInd, firstContents] = splitLine(lines.shift()!);
   if (firstInd !== '>') {
     throw new Error('first line of snapshot was not a source line');
   }

   let lastLine: TokenizedLine = {
     contents: firstContents,
     tokens: [],
   };
   for (const line of lines) {
     // the format (seems, this is blakckbox for now) to be that lines
     // either start with `>` to indicate source lines, or `#` to indicate
     // spans

     const [indicator, contents] = splitLine(line);

     switch (indicator) {
       case '#':
         // more tokens for the last line
         // token lines go /^\w*(?<span>\^+) (?<kind>.+)$/
         // where `span` indicates the portion of text that they cover,
         // and kind indicates the token type.
         // kind is made up of space-separated scopes
         //
         // It's human readable?

         // split out the raw parts
         const splitOut = contents.match(/^(\s*)(\^+) (.+)$/);
         if (!splitOut) {
           throw new Error(`malformed token line '${contents}' in snapshot`);
         }
         const [_full, ws, spanRaw, scopesRaw] = splitOut;

         // convert them to textmate tokens
         // start index is simply the amount of space before the first carent,
         // and end index is that plus the length of the carets
         let [startIndex, endIndex] = [ws.length, ws.length + spanRaw.length];
         lastLine.tokens.push({
           startIndex, endIndex,
           scopes: scopesRaw.split(' '),
         });
         break;
       case '>':
         // we've got a new source line, push the last one
         // (we pre-push the first source line, so this is guaranteed to be *something*)
         parsedLines.push(lastLine);
         lastLine = {
           contents,
           tokens: [],
         };
         break;
     }
   }
   // make sure we get the last line, since there's not another line to trigger a push
   parsedLines.push(lastLine);

   return parsedLines;
 }

 /**
   * run a single snapshot test, comparing sourceFile to `${sourceFile}.snap` via
   * the given grammar
   */
 async function snapTest(sourceFile: string, grammar: tm.IGrammar): Promise<void> {
   let source = await fs.readFile(sourceFile, 'utf8');
   let snap = await fs.readFile(`${sourceFile}.snap`, 'utf8');

   let expected = parseSnapshot(snap);
   let actual = tokenize(source, grammar).map((line) => {
     // for reasons unbeknownst to anyone vscode-tmgrammar-snap skips generating
     // tokenization in snapshots for "empty" (empty or whitespace-only) lines,
     // meaning we need to remove tokens for those here (since they'll normally
     // have a root scope attached)
     if (line.contents.trim().length === 0) {
       line.tokens = [];
     }
     return line;
   });

   assert.deepStrictEqual(expected, actual);
 }

 // Dynamically create the syntax tests
 const setupTests = (async () => {
   const grammarDir = path.join(__dirname, '..', 'dist', 'resources');
   const rootSnapDir = path.join(__dirname, '..', 'src', 'test', 'suite', 'snap');
   const kinds = ['fidl', 'cml'];

   const snapFiles: Map<string, [string, string][]> = new Map();

   for (const kind of kinds) {
     const snapDir = path.join(rootSnapDir, kind);
     // filter out the `.snap` files, so we're only left with the base input files
     const files: [string, string][] = (await fs.readdir(snapDir)).
       filter((file) => path.extname(file) === `.${kind}`).
       map((file) => [file, path.join(snapDir, file)]);
     snapFiles.set(kind, files);
   }

   describe('syntax highlighting', function () {
     let registry: tm.Registry;
     before(async function () {
       registry = await initRegistry(grammarDir);
     });

     for (const kind of kinds) {
       describe(`for ${kind}`, function () {
         let grammar: tm.IGrammar;
         before(async function () {
           let maybeGrammar = await registry.loadGrammar(`source.${kind}`);
           if (!maybeGrammar) {
             throw new Error(`couldn't load grammar for ${kind}`);
           }
           grammar = maybeGrammar;
         });

         for (const [file, fullPath] of snapFiles.get(kind)! /* not undefined b/c above */) {
           it(`should highlight ${path.basename(file)}`, async function () {
             await snapTest(fullPath, grammar);
           });
         }
       });
     }
   });
 })(); // no top-level await, so iffes it is

 // use a "fake" test to get the async code to run synchronously
 // and surface errors as failures
 describe('syntax test setup', function () {
   it('will load all the tests [ignore this]', async function () {
     await setupTests;
   });
 });
	// Copyright 2022 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	import assert from 'assert'; // not esm
	import * as path from 'path';
	import * as fs from 'fs/promises';
	import { describe, it, before } from 'mocha';
	import * as tm from 'vscode-textmate';
	import * as onig from 'vscode-oniguruma';

	/**
	* initializes a textmate grammar registry that loads files of the form
	* `${kind}.tmLanguage.json` from the given dir
	*/
	async function initRegistry(grammarDir: string): Promise<tm.Registry> {
	const onigWasmPath = require.resolve('vscode-oniguruma').replace(/main\.js$/, 'onig.wasm');
	const onigWasmRaw = await fs.readFile(onigWasmPath);
	// the constructor takes a promise? weird
	const onigLib = onig.loadWASM(onigWasmRaw).then(() => ({
	createOnigScanner(patterns: string[]) { return new onig.OnigScanner(patterns); },
	createOnigString(s: string) { return new onig.OnigString(s); }
	}));

	return new tm.Registry({
	onigLib,
	loadGrammar: async (scopeName) => {
	// scope is this weird thing that's usually "source.${ext}",
	// but is basically just a way to generically name an file type
	// (it's a weird textmate quirk)
	const parts = scopeName.match(/^source\.(.+)$/);
	if (!parts) {
	// unknown scope, not source.ext
	console.error('unknown grammar requested, scope not in source.ext form', scopeName);
	return null;
	}
	const [_source, ext] = parts;
	const grammarFile = path.join(grammarDir, `${ext}.tmLanguage.json`);
	let contents;
	try {
	contents = await fs.readFile(grammarFile, 'utf8');
	} catch (ex) {
	// not a known grammar, couldn't read
	throw new Error(`unkown grammar ${ext} (${grammarFile}) requested: ${ex}`);
	}

	// need the filename to indicate that this is JSON, no a plist
	return await tm.parseRawGrammar(contents, `${ext}.tmLanguage.json`);
	}
	});
	}

	/**
	* a single tokenized line (with the raw line + tokens)
	*/
	interface TokenizedLine {
	contents: string;
	tokens: tm.IToken[];
	}

	/**
	* break a source file down into a set of tokenized lines based on the given grammar
	*/
	function tokenize(code: string, grammar: tm.IGrammar): TokenizedLine[] {
	const lines = code.split(/\r?\n/);
	let ruleStack = tm.INITIAL;
	return lines.map((contents) => {
	let toks = grammar.tokenizeLine(contents, ruleStack);
	ruleStack = toks.ruleStack;
	return { contents, tokens: toks.tokens };
	});
	}

	/**
	* parse a vscode-tmgrammar-test snapshot into the same kind of tokenized
	* lines as returned by #tokenize
	*/
	function parseSnapshot(snapshot: string): TokenizedLine[] {
	const lines = snapshot.split(/\r?\n/);
	if (lines.length < 1) {
	throw new Error('empty snapshot');
	}
	const parsedLines = [];
	const splitLine = (line: string): ['#' \| '>', string] => {
	if (line.length < 1) { throw new Error('empty line in snapshot'); }
	const [indicator, contents] = [line[0], line.slice(1)];
	if (indicator !== '#' && indicator !== '>') {
	throw new Error(`unknown snapshot line start character '${indicator}'`);
	}
	return [indicator, contents];
	};

	let [firstInd, firstContents] = splitLine(lines.shift()!);
	if (firstInd !== '>') {
	throw new Error('first line of snapshot was not a source line');
	}

	let lastLine: TokenizedLine = {
	contents: firstContents,
	tokens: [],
	};
	for (const line of lines) {
	// the format (seems, this is blakckbox for now) to be that lines
	// either start with `>` to indicate source lines, or `#` to indicate
	// spans

	const [indicator, contents] = splitLine(line);

	switch (indicator) {
	case '#':
	// more tokens for the last line
	// token lines go /^\w*(?<span>\^+) (?<kind>.+)$/
	// where `span` indicates the portion of text that they cover,
	// and kind indicates the token type.
	// kind is made up of space-separated scopes
	//
	// It's human readable?

	// split out the raw parts
	const splitOut = contents.match(/^(\s*)(\^+) (.+)$/);
	if (!splitOut) {
	throw new Error(`malformed token line '${contents}' in snapshot`);
	}
	const [_full, ws, spanRaw, scopesRaw] = splitOut;

	// convert them to textmate tokens
	// start index is simply the amount of space before the first carent,
	// and end index is that plus the length of the carets
	let [startIndex, endIndex] = [ws.length, ws.length + spanRaw.length];
	lastLine.tokens.push({
	startIndex, endIndex,
	scopes: scopesRaw.split(' '),
	});
	break;
	case '>':
	// we've got a new source line, push the last one
	// (we pre-push the first source line, so this is guaranteed to be something)
	parsedLines.push(lastLine);
	lastLine = {
	contents,
	tokens: [],
	};
	break;
	}
	}
	// make sure we get the last line, since there's not another line to trigger a push
	parsedLines.push(lastLine);

	return parsedLines;
	}

	/**
	* run a single snapshot test, comparing sourceFile to `${sourceFile}.snap` via
	* the given grammar
	*/
	async function snapTest(sourceFile: string, grammar: tm.IGrammar): Promise<void> {
	let source = await fs.readFile(sourceFile, 'utf8');
	let snap = await fs.readFile(`${sourceFile}.snap`, 'utf8');

	let expected = parseSnapshot(snap);
	let actual = tokenize(source, grammar).map((line) => {
	// for reasons unbeknownst to anyone vscode-tmgrammar-snap skips generating
	// tokenization in snapshots for "empty" (empty or whitespace-only) lines,
	// meaning we need to remove tokens for those here (since they'll normally
	// have a root scope attached)
	if (line.contents.trim().length === 0) {
	line.tokens = [];
	}
	return line;
	});

	assert.deepStrictEqual(expected, actual);
	}

	// Dynamically create the syntax tests
	const setupTests = (async () => {
	const grammarDir = path.join(__dirname, '..', 'dist', 'resources');
	const rootSnapDir = path.join(__dirname, '..', 'src', 'test', 'suite', 'snap');
	const kinds = ['fidl', 'cml'];

	const snapFiles: Map<string, [string, string][]> = new Map();

	for (const kind of kinds) {
	const snapDir = path.join(rootSnapDir, kind);
	// filter out the `.snap` files, so we're only left with the base input files
	const files: [string, string][] = (await fs.readdir(snapDir)).
	filter((file) => path.extname(file) === `.${kind}`).
	map((file) => [file, path.join(snapDir, file)]);
	snapFiles.set(kind, files);
	}

	describe('syntax highlighting', function () {
	let registry: tm.Registry;
	before(async function () {
	registry = await initRegistry(grammarDir);
	});

	for (const kind of kinds) {
	describe(`for ${kind}`, function () {
	let grammar: tm.IGrammar;
	before(async function () {
	let maybeGrammar = await registry.loadGrammar(`source.${kind}`);
	if (!maybeGrammar) {
	throw new Error(`couldn't load grammar for ${kind}`);
	}
	grammar = maybeGrammar;
	});

	for (const [file, fullPath] of snapFiles.get(kind)! /* not undefined b/c above */) {
	it(`should highlight ${path.basename(file)}`, async function () {
	await snapTest(fullPath, grammar);
	});
	}
	});
	}
	});
	})(); // no top-level await, so iffes it is

	// use a "fake" test to get the async code to run synchronously
	// and surface errors as failures
	describe('syntax test setup', function () {
	it('will load all the tests [ignore this]', async function () {
	await setupTests;
	});
	});