Skip to content

Commit 530be52

Browse files
authored
Fancier caching of selection get (#195)
We've seen some performance issue when calling selection.get very often; this should speed it up a bit. We already cache the tile bitmasks, but currently for each Selection.get we have to identify where the nth TRUE element is for one tile. This is currently handled by getting in each individual position. This introduces two improvements: it calls bitmask.which(), which returns an int16 array with locations for all the set bits. Ordinarily we try to avoid materializing this because for a fully set mask it's 16x larger than the bitmask itself. But for a nearly empty bitmask it iterates much faster, because it works a byte at a time instead of a bit at a time. We cache the output of bitmask.which(), so we don't have to run it repeatedly at all. The 1000 fifo cache is global for deepscatter: assuming 50,000 items per bitmask and 50% filled bitmasks, that could be 100MB of CPU data in this cache. That ain't nothing! I might also make the cache expire after 1-10 seconds to avoid gunking things up too much. <!-- ELLIPSIS_HIDDEN --> ---- > [!IMPORTANT] > Improves `Selection.get` performance by caching bitmask indices using a new `FifoTupleMap` class. > > - **Caching**: > - Introduces `FifoTupleMap` in `utilityFunctions.ts` for FIFO caching. > - Caches `bitmask.which()` results in `cachedWhiches` in `selection.ts` to improve `Selection.get` performance. > - **Behavior**: > - `Selection.get` in `selection.ts` now uses cached bitmask indices, reducing repeated calculations. > - **Misc**: > - Adds `check` script to `package.json` for building and testing. > > <sup>This description was created by </sup>[<img alt="Ellipsis" src="https://img.shields.io/badge/Ellipsis-blue?color=175173">](https://www.ellipsis.dev?ref=nomic-ai%2Fdeepscatter&utm_source=github&utm_medium=referral)<sup> for 3b055be. You can [customize](https://app.ellipsis.dev/nomic-ai/settings/summaries) this summary. It will automatically update as commits are pushed.</sup> <!-- ELLIPSIS_HIDDEN -->
1 parent 1f00993 commit 530be52

File tree

3 files changed

+49
-18
lines changed

3 files changed

+49
-18
lines changed

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
},
1919
"scripts": {
2020
"dev": "vite --mode dev --port 3344 --host",
21+
"check": "npm run build && npm run test",
2122
"format": "prettier --write src",
2223
"prepare": "npm run build",
2324
"build": "vite build && tsc",

src/selection.ts

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import { getTileFromRow } from './tixrixqid';
66
import type * as DS from './types';
77
import { Bool, StructRowProxy, Utf8, Vector, makeData } from 'apache-arrow';
88
import { bisectLeft, bisectRight, range } from 'd3-array';
9-
import { MinHeap } from './utilityFunctions';
9+
import { FifoTupleMap, MinHeap } from './utilityFunctions';
1010
interface SelectParams {
1111
name: string;
1212
useNameCache?: boolean; // If true and a selection with that name already exists, use it and ignore all passed parameters. Otherwise, throw an error.
@@ -382,6 +382,7 @@ class SelectionTile {
382382
* more than once a second or so.
383383
*/
384384

385+
const cachedWhiches = new FifoTupleMap<SelectionTile, Uint16Array>(250);
385386
export class DataSelection {
386387
deeptable: Deeptable;
387388
plot: Scatterplot;
@@ -818,31 +819,33 @@ export class DataSelection {
818819
i = this.selectionSize + i;
819820
}
820821
let currentOffset = 0;
821-
let relevantTile: Tile | undefined = undefined;
822-
for (const { tile, matchCount } of this.tiles) {
823-
if (i < currentOffset + matchCount) {
824-
relevantTile = tile;
822+
let relevantTile: SelectionTile | undefined = undefined;
823+
for (const selectionTile of this.tiles) {
824+
if (i < currentOffset + selectionTile.matchCount) {
825+
relevantTile = selectionTile;
825826
break;
826827
}
827-
currentOffset += matchCount;
828+
currentOffset += selectionTile.matchCount;
828829
}
829830
if (relevantTile === undefined) {
830831
return undefined;
831832
}
832-
const column = relevantTile.record_batch.getChild(
833-
this.name,
834-
) as Vector<Bool>;
833+
835834
const offset = i - currentOffset;
836-
let ix_in_match = 0;
837-
for (let j = 0; j < column.length; j++) {
838-
if (column.get(j)) {
839-
if (ix_in_match === offset) {
840-
return relevantTile.record_batch.get(j) || undefined;
841-
}
842-
ix_in_match++;
843-
}
835+
let cached = cachedWhiches.get([relevantTile]);
836+
if (cached === undefined) {
837+
const mask = Bitmask.from_arrow(relevantTile.bitmask);
838+
cached = mask.which();
839+
cachedWhiches.set([relevantTile], cached)
844840
}
845-
throw new Error(`unable to locate point ${i}`);
841+
842+
if (offset >= cached.length) {
843+
throw new Error(`unable to locate point ${i}`);
844+
}
845+
const tix = cached[offset];
846+
847+
return relevantTile.tile.record_batch.get(tix) || undefined;
848+
846849
}
847850

848851
// Iterate over the points in raw order.

src/utilityFunctions.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,12 @@ export class TupleMap<K = object, V = object> {
107107
}
108108
}
109109

110+
get size(): number {
111+
return this.map.size;
112+
}
113+
110114
set(keys: Some<K>, value: V): void {
115+
// eslint-disable-next-line @typescript-eslint/no-this-alias
111116
let currentMap: TupleMap<K, V> = this;
112117
for (const key of keys) {
113118
if (!currentMap.map.has(key)) {
@@ -329,3 +334,25 @@ export class MinHeap<T> {
329334
}
330335
}
331336
}
337+
338+
339+
export class FifoTupleMap<K, V> extends TupleMap<K, V> {
340+
// private f: (key: K, arg: ArgT) => V;
341+
max: number;
342+
constructor(max: number) {
343+
super();
344+
this.max = max;
345+
}
346+
347+
set(key: Some<K>, value: V) {
348+
super.set(key, value);
349+
if (this.size > this.max) {
350+
const next = this.keys().next();
351+
if (next.value) {
352+
// using 'as' here because we lose the type in the iteration.
353+
this.delete(next.value as Some<K>);
354+
}
355+
}
356+
return this;
357+
}
358+
}

0 commit comments

Comments
 (0)