fix some edge cases in deterministicGrouping and add more tests

This commit is contained in:
Tobias Koppers 2021-03-31 16:37:33 +02:00
parent 3286203141
commit 3d90836f29
2 changed files with 224 additions and 4 deletions

View File

@ -216,6 +216,7 @@ class Group {
lastNode = node;
}
}
if (resultNodes.length === this.nodes.length) return undefined;
this.nodes = newNodes;
this.similarities = newSimilarities;
this.size = sumSize(newNodes);
@ -296,14 +297,15 @@ module.exports = ({ maxSize, minSize, items, getSize, getKey }) => {
if (initialNodes.length > 0) {
const initialGroup = new Group(initialNodes, getSimilarities(initialNodes));
const removeProblematicNodes = group => {
const problemTypes = getTooSmallTypes(group.size, minSize);
const removeProblematicNodes = (group, consideredSize = group.size) => {
const problemTypes = getTooSmallTypes(consideredSize, minSize);
if (problemTypes.size > 0) {
// We hit an edge case where the working set is already smaller than minSize
// We merge problematic nodes with the smallest result node to keep minSize intact
const problemNodes = group.popNodes(
n => getNumberOfMatchingSizeTypes(n.size, problemTypes) > 0
);
if (problemNodes === undefined) return false;
// Only merge it with result nodes that have the problematic size type
const possibleResultGroups = result.filter(
n => getNumberOfMatchingSizeTypes(n.size, problemTypes) > 0
@ -377,7 +379,31 @@ module.exports = ({ maxSize, minSize, items, getSize, getKey }) => {
right--;
}
// left v v right
// [ O O O ] O O O [ O O O ]
// ^^^^^^^^^ leftSize
// rightSize ^^^^^^^^^
// leftSize > minSize
// rightSize > minSize
// Perfect split: [ O O O ] [ O O O ]
// right === left - 1
if (left - 1 > right) {
// We try to remove some problematic nodes to "fix" that
let prevSize;
if (right < group.nodes.length - left) {
subtractSizeFrom(rightSize, group.nodes[right + 1].size);
prevSize = rightSize;
} else {
subtractSizeFrom(leftSize, group.nodes[left - 1].size);
prevSize = leftSize;
}
if (removeProblematicNodes(group, prevSize)) {
// This changed something, so we try this group again
queue.push(group);
continue;
}
// can't split group while holding minSize
// because minSize is preferred of maxSize we return
// the problematic nodes as result here even while it's too big
@ -395,7 +421,13 @@ module.exports = ({ maxSize, minSize, items, getSize, getKey }) => {
let bestSimilarity = Infinity;
let pos = left;
let rightSize = sumSize(group.nodes.slice(pos));
while (pos <= right) {
// pos v v right
// [ O O O ] O O O [ O O O ]
// ^^^^^^^^^ leftSize
// rightSize ^^^^^^^^^^^^^^^
while (pos <= right + 1) {
const similarity = group.similarities[pos - 1];
if (
similarity < bestSimilarity &&
@ -410,7 +442,9 @@ module.exports = ({ maxSize, minSize, items, getSize, getKey }) => {
pos++;
}
if (best < 0) {
// can't split group while holding minSize
// This can't happen
// but if that assumption is wrong
// fallback to a big group
result.push(group);
continue;
}

View File

@ -0,0 +1,186 @@
const deterministicGrouping = require("../lib/util/deterministicGrouping");
describe("deterministicGrouping", () => {
const group = (items, minSize, maxSize) => {
return deterministicGrouping({
items: items.map((item, i) => [i, item]),
minSize,
maxSize,
getKey: ([key]) => `${100000 + key}`,
getSize: ([, size]) => size
}).map(group => ({ items: group.items.map(([i]) => i), size: group.size }));
};
it("should split large chunks with different size types", () => {
expect(
group(
[{ a: 3, b: 3 }, { b: 1 }, { a: 3 }],
{
a: 3,
b: 3
},
{
a: 5,
b: 5
}
)
).toMatchInlineSnapshot(`
Array [
Object {
"items": Array [
0,
1,
],
"size": Object {
"a": 3,
"b": 4,
},
},
Object {
"items": Array [
2,
],
"size": Object {
"a": 3,
},
},
]
`);
});
it("should separate items with different size types when unsplittable", () => {
expect(
group(
[
{ a: 1 },
{ b: 1 },
{ a: 1 },
{ a: 1 },
{ b: 1 },
{ a: 1 },
{ a: 1 },
{ b: 1 },
{ a: 1 },
{ a: 1 }
],
{
a: 3,
b: 3
},
{
a: 5,
b: 5
}
)
).toMatchInlineSnapshot(`
Array [
Object {
"items": Array [
0,
2,
3,
],
"size": Object {
"a": 3,
},
},
Object {
"items": Array [
1,
4,
7,
],
"size": Object {
"b": 3,
},
},
Object {
"items": Array [
5,
6,
8,
9,
],
"size": Object {
"a": 4,
},
},
]
`);
});
it("should handle entangled size types (case 1)", () => {
expect(
group(
[
{ c: 2, b: 2 },
{ a: 2, c: 2 },
{ a: 2, b: 2 }
],
{
a: 3,
b: 3,
c: 3
},
{
a: 3,
b: 3,
c: 3
}
)
).toMatchInlineSnapshot(`
Array [
Object {
"items": Array [
0,
1,
2,
],
"size": Object {
"a": 4,
"b": 4,
"c": 4,
},
},
]
`);
});
it("should handle entangled size types (case 2)", () => {
expect(
group(
[
{ c: 2, b: 2 },
{ a: 2, c: 2 },
{ a: 2, b: 2 }
],
{
a: 3,
b: 3
},
{
c: 3
}
)
).toMatchInlineSnapshot(`
Array [
Object {
"items": Array [
0,
2,
],
"size": Object {
"a": 2,
"b": 4,
"c": 2,
},
},
Object {
"items": Array [
1,
],
"size": Object {
"a": 2,
"c": 2,
},
},
]
`);
});
});