allows to select `xxhash64` as hashFunction

This commit is contained in:
Tobias Koppers 2021-09-23 12:55:18 +02:00
parent 4a8bf4c084
commit 3f142d9cd6
10 changed files with 461 additions and 2 deletions

2
.gitignore vendored
View File

@ -7,6 +7,8 @@
/benchmark/js
/benchmark/fixtures
/examples/**/dist
/assembly/**/*.wat
/assembly/**/*.wasm
/coverage
/.nyc_output
/.jest-cache

View File

@ -0,0 +1,131 @@
// //////////////////////////////////////////////////////////
// xxhash64.h
// Copyright (c) 2016 Stephan Brumme. All rights reserved.
// see http://create.stephan-brumme.com/disclaimer.html
//
// XXHash (64 bit), based on Yann Collet's descriptions, see
// http://cyan4973.github.io/xxHash/
//
// Modified for hash-wasm by Dani Biró
//
// Ported to assemblyscript by Tobias Koppers
// Modifications:
// - seed is always 0
// - update is only called with a multiple of 32
// - final takes the remaining 0 - 31 bytes
//
const Prime1: u64 = 11400714785074694791;
const Prime2: u64 = 14029467366897019727;
const Prime3: u64 = 1609587929392839161;
const Prime4: u64 = 9650029242287828579;
const Prime5: u64 = 2870177450012600261;
let state0: u64;
let state1: u64;
let state2: u64;
let state3: u64;
let totalLength: u64;
function processSingle(previous: u64, input: u64): u64 {
return rotl(previous + input * Prime2, 31) * Prime1;
}
export function init(): void {
state0 = Prime1 + Prime2;
state1 = Prime2;
state2 = 0;
state3 = 0 - Prime1;
totalLength = 0;
}
export function update(length: u32): void {
if (length == 0) return;
totalLength += length;
let dataPtr: u32 = 0;
let s0 = state0;
let s1 = state1;
let s2 = state2;
let s3 = state3;
do {
s0 = processSingle(s0, load<u64>(dataPtr));
s1 = processSingle(s1, load<u64>(dataPtr + 8));
s2 = processSingle(s2, load<u64>(dataPtr + 16));
s3 = processSingle(s3, load<u64>(dataPtr + 24));
dataPtr += 32;
} while (dataPtr < length);
state0 = s0;
state1 = s1;
state2 = s2;
state3 = s3;
}
export function final(length: u32): void {
// fold 256 bit state into one single 64 bit value
let result: u64;
if (totalLength > 0) {
result =
rotl(state0, 1) + rotl(state1, 7) + rotl(state2, 12) + rotl(state3, 18);
result = (result ^ processSingle(0, state0)) * Prime1 + Prime4;
result = (result ^ processSingle(0, state1)) * Prime1 + Prime4;
result = (result ^ processSingle(0, state2)) * Prime1 + Prime4;
result = (result ^ processSingle(0, state3)) * Prime1 + Prime4;
} else {
result = Prime5;
}
result += totalLength + length;
let dataPtr: u32 = 0;
// at least 8 bytes left ? => eat 8 bytes per step
for (; dataPtr + 8 <= length; dataPtr += 8) {
result =
rotl(result ^ processSingle(0, load<u64>(dataPtr)), 27) * Prime1 + Prime4;
}
// 4 bytes left ? => eat those
if (dataPtr + 4 <= length) {
result = rotl(result ^ (load<u32>(dataPtr) * Prime1), 23) * Prime2 + Prime3;
dataPtr += 4;
}
// take care of remaining 0..3 bytes, eat 1 byte per step
while (dataPtr !== length) {
result = rotl(result ^ (load<u8>(dataPtr) * Prime5), 11) * Prime1;
dataPtr++;
}
// mix bits
result ^= result >> 33;
result *= Prime2;
result ^= result >> 29;
result *= Prime3;
result ^= result >> 32;
store<u64>(0, result);
store<u64>(0, u32ToHex(result >> 32));
store<u64>(8, u32ToHex(result & 0xffffffff));
}
function u32ToHex(x: u64): u64 {
// from https://johnnylee-sde.github.io/Fast-unsigned-integer-to-hex-string/
x = ((x & 0xffff) << 32) | ((x & 0xffff0000) >> 16);
x = ((x & 0x0000ff000000ff00) >> 8) | ((x & 0x000000ff000000ff) << 16);
x = ((x & 0x00f000f000f000f0) >> 4) | ((x & 0x000f000f000f000f) << 8);
const mask = ((x + 0x0606060606060606) >> 4) & 0x0101010101010101;
x |= 0x3030303030303030;
x += 0x27 * mask;
return x;
}

6
assembly/tsconfig.json Normal file
View File

@ -0,0 +1,6 @@
{
"extends": "assemblyscript/std/assembly.json",
"include": [
"./**/*.asm.ts"
]
}

View File

@ -190,6 +190,7 @@
"hashbang",
"webassemblyjs",
"assemblyscript",
"fsevents",
"watchpack",
"tapable",
@ -201,6 +202,7 @@
"MCEP",
"traceur",
"atlaskit",
"xxhash",
"xxhashjs",
"systemjs",
"skypack",

View File

@ -125,6 +125,7 @@ class DebugHash extends Hash {
}
let crypto = undefined;
let createXXHash64 = undefined;
/**
* Creates a hash by name or function
@ -139,6 +140,10 @@ module.exports = algorithm => {
// TODO add non-cryptographic algorithm here
case "debug":
return new DebugHash();
case "xxhash64":
if (createXXHash64 === undefined)
createXXHash64 = require("./hash/xxhash64");
return createXXHash64();
default:
if (crypto === undefined) crypto = require("crypto");
return new BulkUpdateDecorator(

128
lib/util/hash/xxhash64.js Normal file
View File

@ -0,0 +1,128 @@
/*
MIT License http://www.opensource.org/licenses/mit-license.php
Author Tobias Koppers @sokra
*/
"use strict";
//#region wasm code: xxhash64 (../../../assembly/hash/xxhash64.asm.ts) --initialMemory 1
const xxhash64 = new WebAssembly.Module(
Buffer.from(
// 1180 bytes
"AGFzbQEAAAABCAJgAX8AYAAAAwQDAQAABQMBAAEGGgV+AUIAC34BQgALfgFCAAt+AUIAC34BQgALByIEBGluaXQAAAZ1cGRhdGUAAQVmaW5hbAACBm1lbW9yeQIACrwIAzAAQtbrgu7q/Yn14AAkAELP1tO+0ser2UIkAUIAJAJC+erQ0OfJoeThACQDQgAkBAvUAQIBfwR+IABFBEAPCyMEIACtfCQEIwAhAiMBIQMjAiEEIwMhBQNAIAIgASkDAELP1tO+0ser2UJ+fEIfiUKHla+vmLbem55/fiECIAMgASkDCELP1tO+0ser2UJ+fEIfiUKHla+vmLbem55/fiEDIAQgASkDEELP1tO+0ser2UJ+fEIfiUKHla+vmLbem55/fiEEIAUgASkDGELP1tO+0ser2UJ+fEIfiUKHla+vmLbem55/fiEFIAAgAUEgaiIBSw0ACyACJAAgAyQBIAQkAiAFJAMLsgYCAX8EfiMEQgBSBH4jACICQgGJIwEiA0IHiXwjAiIEQgyJfCMDIgVCEol8IAJCz9bTvtLHq9lCfkIfiUKHla+vmLbem55/foVCh5Wvr5i23puef35CnaO16oOxjYr6AH0gA0LP1tO+0ser2UJ+Qh+JQoeVr6+Ytt6bnn9+hUKHla+vmLbem55/fkKdo7Xqg7GNivoAfSAEQs/W077Sx6vZQn5CH4lCh5Wvr5i23puef36FQoeVr6+Ytt6bnn9+Qp2jteqDsY2K+gB9IAVCz9bTvtLHq9lCfkIfiUKHla+vmLbem55/foVCh5Wvr5i23puef35CnaO16oOxjYr6AH0FQsXP2bLx5brqJwsjBCAArXx8IQIDQCABQQhqIABNBEAgAiABKQMAQs/W077Sx6vZQn5CH4lCh5Wvr5i23puef36FQhuJQoeVr6+Ytt6bnn9+Qp2jteqDsY2K+gB9IQIgAUEIaiEBDAELCyABQQRqIABNBEACfyACIAE1AgBCh5Wvr5i23puef36FQheJQs/W077Sx6vZQn5C+fPd8Zn2masWfCECIAFBBGoLIQELA0AgACABRwRAIAIgATEAAELFz9my8eW66id+hUILiUKHla+vmLbem55/fiECIAFBAWohAQwBCwtBACACIAJCIYiFQs/W077Sx6vZQn4iAiACQh2IhUL5893xmfaZqxZ+IgIgAkIgiIUiAjcDAEEAIAJCIIgiA0L//wODQiCGIANCgID8/w+DQhCIhCIDQv+BgIDwH4NCEIYgA0KA/oOAgOA/g0IIiIQiA0KPgLyA8IHAB4NCCIYgA0LwgcCHgJ6A+ACDQgSIhCIDQoaMmLDgwIGDBnxCBIhCgYKEiJCgwIABg0InfiADQrDgwIGDhoyYMIR8NwMAQQggAkL/////D4MiAkL//wODQiCGIAJCgID8/w+DQhCIhCICQv+BgIDwH4NCEIYgAkKA/oOAgOA/g0IIiIQiAkKPgLyA8IHAB4NCCIYgAkLwgcCHgJ6A+ACDQgSIhCICQoaMmLDgwIGDBnxCBIhCgYKEiJCgwIABg0InfiACQrDgwIGDhoyYMIR8NwMACw==",
"base64"
)
);
//#endregion
class XxHash64 {
/**
* @param {WebAssembly.Instance} instance wasm instance
*/
constructor(instance) {
const exports = /** @type {any} */ (instance.exports);
exports.init();
this.instance = instance;
this.exports = exports;
this.mem = Buffer.from(exports.memory.buffer, 0, 65536);
this.buffered = 0;
}
reset() {
this.buffered = 0;
this.exports.init();
}
/**
* @param {Buffer | string} data data
* @param {BufferEncoding=} encoding encoding
* @returns {this} itself
*/
update(data, encoding) {
if (typeof data === "string") {
if (data.length < 21845) {
this._updateWithShortString(data, encoding);
return this;
} else {
data = Buffer.from(data, encoding);
}
}
this._updateWithBuffer(data);
return this;
}
/**
* @param {string} data data
* @param {BufferEncoding=} encoding encoding
* @returns {void}
*/
_updateWithShortString(data, encoding) {
const { exports, buffered, mem } = this;
const length = mem.write(data, buffered, encoding);
if (buffered + length < 32) {
this.buffered += length;
} else {
const l = ((buffered + length) >> 5) << 5;
exports.update(l);
const newBuffered = length + buffered - l;
this.buffered = newBuffered;
if (newBuffered > 0) mem.copyWithin(0, l, buffered + length);
}
}
/**
* @param {Buffer} data data
* @returns {void}
*/
_updateWithBuffer(data) {
const { exports, buffered, mem } = this;
const length = data.length;
if (buffered + length < 32) {
data.copy(mem, buffered, 0, length);
this.buffered += length;
} else {
const l = ((buffered + length) >> 5) << 5;
if (l > 65536) {
let i = 65536 - buffered;
data.copy(mem, buffered, 0, i);
exports.update(65536);
const stop = l - buffered - 65536;
while (i < stop) {
data.copy(mem, 0, i, i + 65536);
exports.update(65536);
i += 65536;
}
data.copy(mem, 0, i, l - buffered);
exports.update(l - buffered - i);
} else {
data.copy(mem, buffered, 0, l - buffered);
exports.update(l);
}
const newBuffered = length + buffered - l;
this.buffered = newBuffered;
if (newBuffered > 0) data.copy(mem, 0, length - newBuffered, length);
}
}
digest(type) {
const { exports, buffered, mem } = this;
exports.final(buffered);
Buffer.from(mem.buffer);
instancesPool.push(this);
return this.mem.toString("latin1", 0, 16);
}
}
const instancesPool = [];
const create = () => {
if (instancesPool.length > 0) {
const old = instancesPool.pop();
old.reset();
return old;
} else {
return new XxHash64(new WebAssembly.Instance(xxhash64));
}
};
module.exports = create;

View File

@ -41,6 +41,7 @@
"@types/es-module-lexer": "^0.4.1",
"@types/jest": "^27.0.1",
"@types/node": "^15.0.1",
"assemblyscript": "^0.19.16",
"babel-loader": "^8.1.0",
"benchmark": "^2.1.4",
"bundle-loader": "^0.5.6",
@ -61,6 +62,7 @@
"eslint-plugin-prettier": "^4.0.0",
"file-loader": "^6.0.0",
"fork-ts-checker-webpack-plugin": "^6.0.5",
"hash-wasm": "^4.9.0",
"husky": "^6.0.0",
"is-ci": "^3.0.0",
"istanbul": "^0.4.5",
@ -156,8 +158,8 @@
"type-lint": "tsc",
"typings-lint": "tsc -p tsconfig.test.json",
"spellcheck": "cspell \"{.github,benchmark,bin,examples,hot,lib,schemas,setup,tooling}/**/*.{md,yml,yaml,js,json}\" \"*.md\"",
"special-lint": "node node_modules/tooling/lockfile-lint && node node_modules/tooling/schemas-lint && node node_modules/tooling/inherit-types && node node_modules/tooling/format-schemas && node tooling/generate-runtime-code.js && node node_modules/tooling/format-file-header && node node_modules/tooling/compile-to-definitions && node node_modules/tooling/precompile-schemas && node node_modules/tooling/generate-types --no-template-literals",
"special-lint-fix": "node node_modules/tooling/inherit-types --write && node node_modules/tooling/format-schemas --write && node tooling/generate-runtime-code.js --write && node node_modules/tooling/format-file-header --write && node node_modules/tooling/compile-to-definitions --write && node node_modules/tooling/precompile-schemas --write && node node_modules/tooling/generate-types --no-template-literals --write",
"special-lint": "node node_modules/tooling/lockfile-lint && node node_modules/tooling/schemas-lint && node node_modules/tooling/inherit-types && node node_modules/tooling/format-schemas && node tooling/generate-runtime-code.js && node tooling/generate-wasm-code.js && node node_modules/tooling/format-file-header && node node_modules/tooling/compile-to-definitions && node node_modules/tooling/precompile-schemas && node node_modules/tooling/generate-types --no-template-literals",
"special-lint-fix": "node node_modules/tooling/inherit-types --write && node node_modules/tooling/format-schemas --write && node tooling/generate-runtime-code.js --write && node tooling/generate-wasm-code.js --write && node node_modules/tooling/format-file-header --write && node node_modules/tooling/compile-to-definitions --write && node node_modules/tooling/precompile-schemas --write && node node_modules/tooling/generate-types --no-template-literals --write",
"fix": "yarn code-lint --fix && yarn special-lint-fix && yarn pretty-lint-fix",
"prepare": "husky install",
"pretty-lint-base": "prettier \"*.{ts,json,yml,yaml,md}\" \"{setup,lib,bin,hot,benchmark,tooling,schemas}/**/*.json\" \"examples/*.md\"",
@ -209,6 +211,8 @@
"<rootDir>/test/fixtures/temp-cache-fixture",
"<rootDir>/test/fixtures/temp-",
"<rootDir>/benchmark",
"<rootDir>/assembly",
"<rootDir>/tooling",
"<rootDir>/examples/*/dist",
"<rootDir>/coverage",
"<rootDir>/.eslintcache"

69
test/XxHash64.unittest.js Normal file
View File

@ -0,0 +1,69 @@
const createHash = require("../lib/util/hash/xxhash64");
const { randomBytes } = require("crypto");
const createReferenceHash =
require("hash-wasm/dist/xxhash64.umd.min.js").createXXHash64;
describe("xxhash64", () => {
const sizes = [
1,
2,
3,
4,
5,
7,
8,
9,
16,
31,
32,
33,
64,
100,
1000,
65536 - 1,
65536,
65536 + 1,
65536 + 31,
65536 * 5,
65536 * 7 - 1,
65536 * 9 + 31
];
const test = (name, sizes) => {
it(name + " should generate a hash from binary data", async () => {
const hash = createHash();
const hashString = createHash();
const reference = (await createReferenceHash()).init();
for (const size of sizes) {
const bytes = randomBytes(size);
const string = bytes.toString("base64");
hash.update(bytes);
hashString.update(string, "base64");
reference.update(bytes);
}
const result = hash.digest("hex");
expect(result).toMatch(/^[0-9a-f]{16}$/);
const resultFromString = hashString.digest("hex");
expect(resultFromString).toMatch(/^[0-9a-f]{16}$/);
const expected = reference.digest("hex");
expect(result).toBe(expected);
expect(resultFromString).toBe(expected);
});
};
test("empty hash", []);
for (const size of sizes) {
test(`single update ${size} bytes`, [size]);
}
for (const size1 of sizes) {
for (const size2 of sizes) {
test(`two updates ${size1} + ${size2} bytes`, [size1, size2]);
}
}
test(`many updates 1`, sizes);
test(`many updates 2`, sizes.slice().reverse());
test(`many updates 3`, sizes.concat(sizes.slice().reverse()));
test(`many updates 4`, sizes.slice().reverse().concat(sizes));
});

View File

@ -0,0 +1,89 @@
const path = require("path");
const fs = require("fs");
const asc = require("assemblyscript/cli/asc");
// When --write is set, files will be written in place
// Otherwise it only prints outdated files
const doWrite = process.argv.includes("--write");
const files = ["lib/util/hash/xxhash64.js"];
(async () => {
await asc.ready;
for (const file of files) {
const filePath = path.resolve(__dirname, "..", file);
const content = fs.readFileSync(filePath, "utf-8");
const regexp =
/\n\/\/#region wasm code: (.+) \((.+)\)(.*)\n[\s\S]+?\/\/#endregion\n/g;
const replaces = new Map();
let match = regexp.exec(content);
while (match) {
const [fullMatch, identifier, name, flags] = match;
const sourcePath = path.resolve(filePath, "..", name);
const sourcePathBase = path.join(
path.dirname(sourcePath),
path.basename(sourcePath)
);
await new Promise((resolve, reject) => {
asc.main(
[
sourcePath,
// cspell:word Ospeed
"-Ospeed",
"--noAssert",
"--converge",
"--textFile",
sourcePathBase + ".wat",
"--binaryFile",
sourcePathBase + ".wasm",
...flags.split(" ").filter(Boolean)
],
{
stdout: process.stdout,
stderr: process.stderr
},
err => {
if (err) return reject(err), 0;
resolve();
return 0;
}
);
});
const wasm = fs.readFileSync(sourcePathBase + ".wasm");
replaces.set(
fullMatch,
`
//#region wasm code: ${identifier} (${name})${flags}
const ${identifier} = new WebAssembly.Module(
Buffer.from(
// ${wasm.length} bytes
${JSON.stringify(wasm.toString("base64"))},
"base64"
)
);
//#endregion
`
);
match = regexp.exec(content);
}
const newContent = content.replace(regexp, match => replaces.get(match));
if (newContent !== content) {
if (doWrite) {
fs.writeFileSync(filePath, newContent, "utf-8");
console.error(`${file} updated`);
} else {
console.error(`${file} need to be updated`);
process.exitCode = 1;
}
}
}
})();

View File

@ -1382,6 +1382,14 @@ asn1@~0.2.3:
dependencies:
safer-buffer "~2.1.0"
assemblyscript@^0.19.16:
version "0.19.16"
resolved "https://registry.yarnpkg.com/assemblyscript/-/assemblyscript-0.19.16.tgz#fc06c9892755775e8e31a59249fbc361fd49e1d1"
integrity sha512-AMNdwcat+EEsxjkVQ5vOE/lDbXBvy1swQKAuMG2Ken+DZufZH7wKHIAVKR5liteW/jLL3T971l1MN+onP/bixA==
dependencies:
binaryen "101.0.0-nightly.20210904"
long "^4.0.0"
assert-never@^1.2.1:
version "1.2.1"
resolved "https://registry.yarnpkg.com/assert-never/-/assert-never-1.2.1.tgz#11f0e363bf146205fb08193b5c7b90f4d1cf44fe"
@ -1530,6 +1538,11 @@ binary-extensions@^2.0.0:
resolved "https://registry.yarnpkg.com/binary-extensions/-/binary-extensions-2.1.0.tgz#30fa40c9e7fe07dbc895678cd287024dea241dd9"
integrity sha512-1Yj8h9Q+QDF5FzhMs/c9+6UntbD5MkRfRwac8DoEm9ZfUBZ7tZ55YcGVAzEe4bXsdQHEk+s9S5wsOKVdZrw0tQ==
binaryen@101.0.0-nightly.20210904:
version "101.0.0-nightly.20210904"
resolved "https://registry.yarnpkg.com/binaryen/-/binaryen-101.0.0-nightly.20210904.tgz#58a7990d6d64b16567f376a1fe47d8aea6698b14"
integrity sha512-2AvJhErttuoMvgNcYPPpPy7C12PSvDdtZWtEeX/Otm/Vtf4ePvBpT3UIA00hGAh8HNaGr+dzFNstxTUvjNwZTg==
brace-expansion@^1.1.7:
version "1.1.11"
resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd"
@ -3011,6 +3024,11 @@ has@^1.0.3:
dependencies:
function-bind "^1.1.1"
hash-wasm@^4.9.0:
version "4.9.0"
resolved "https://registry.yarnpkg.com/hash-wasm/-/hash-wasm-4.9.0.tgz#7e9dcc9f7d6bd0cc802f2a58f24edce999744206"
integrity sha512-7SW7ejyfnRxuOc7ptQHSf4LDoZaWOivfzqw+5rpcQku0nHfmicPKE51ra9BiRLAmT8+gGLestr1XroUkqdjL6w==
hasha@^5.0.0:
version "5.2.2"
resolved "https://registry.yarnpkg.com/hasha/-/hasha-5.2.2.tgz#a48477989b3b327aea3c04f53096d816d97522a1"
@ -4189,6 +4207,11 @@ log-update@^4.0.0:
slice-ansi "^4.0.0"
wrap-ansi "^6.2.0"
long@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/long/-/long-4.0.0.tgz#9a7b71cfb7d361a194ea555241c92f7468d5bf28"
integrity sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA==
loose-envify@^1.1.0:
version "1.4.0"
resolved "https://registry.yarnpkg.com/loose-envify/-/loose-envify-1.4.0.tgz#71ee51fa7be4caec1a63839f7e682d8132d30caf"