Skip to content

Commit f49f0a5

Browse files
authored
qid as number, number and cleanup fromArrow (#197)
Two related goals in this one. 1. It has turned out to be more useful to refer to points by their quadtree id ("qid"), the tuple of the tile number and row number. This makes mouseover function return that tuple instead, and changes the definition to be [number, number] instead of [number, Some<number>]. Internal Atlas code may have to change to accomodate this. 2. The tile Manifest loading code now works on Arrow tiles as well. We are moving to a world where the existence of a manifest is a hard requirement in deepscatter. I think this is OK. <!-- ELLIPSIS_HIDDEN --> ---- > [!IMPORTANT] > This PR updates QID handling to use consistent tuples and enhances manifest loading for Arrow tiles, affecting several core functions and tests. > > - **Behavior**: > - `Qid` is now consistently a tuple `[number, number]` instead of `[number, Some<number>]`. > - Mouseover functions return QID tuples. > - Manifest loading now supports Arrow tiles, making manifests mandatory. > - **Functions**: > - `getQids()` in `Deeptable.ts` retrieves `StructRowProxy` for QID tuples. > - `color_pick()` in `regl_rendering.ts` returns QID tuples. > - `set_highlit_points()` in `interaction.ts` uses QID tuples. > - **Misc**: > - Update `max_points` and `source_url` in `FourClasses.svelte`. > - Replace `manifest` with `metadata` in `SelectPoints.svelte` and `dataset.spec.js`. > > <sup>This description was created by </sup>[<img alt="Ellipsis" src="https://img.shields.io/badge/Ellipsis-blue?color=175173">](https://www.ellipsis.dev?ref=nomic-ai%2Fdeepscatter&utm_source=github&utm_medium=referral)<sup> for 62ea242. You can [customize](https://app.ellipsis.dev/nomic-ai/settings/summaries) this summary. It will automatically update as commits are pushed.</sup> <!-- ELLIPSIS_HIDDEN -->
1 parent 520c3c3 commit f49f0a5

File tree

10 files changed

+78
-34
lines changed

10 files changed

+78
-34
lines changed

dev/FourClasses.svelte

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
import SelectPoints from './svelte/SelectPoints.svelte';
99
const startSize = 2;
1010
const prefs = {
11-
source_url: '/newtiles',
12-
max_points: 10,
11+
source_url: '/tiles',
12+
max_points: 10000,
1313
alpha: 35, // Target saturation for the full page.
1414
zoom_balance: 0.22, // Rate at which points increase size. https://observablehq.com/@bmschmidt/zoom-strategies-for-huge-scatterplots-with-three-js
1515
point_size: startSize, // Default point size before application of size scaling

dev/svelte/SelectPoints.svelte

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
const selection = await scatterplot.deeptable.select_data({
99
name: Math.random().toFixed(8),
1010
tileFunction: async (tile) => {
11-
const b = new Bitmask(tile.manifest.nPoints);
12-
for (let i = 0; i < tile.manifest.nPoints; i++) {
11+
const b = new Bitmask(tile.metadata.nPoints);
12+
for (let i = 0; i < tile.metadata.nPoints; i++) {
1313
if (Math.random() < 0.001) {
1414
b.set(i);
1515
}

src/Deeptable.ts

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ import type {
3737
import { DataSelection } from './selection';
3838
import { Some, TupleMap } from './utilityFunctions';
3939
import { getNestedVector } from './regl_rendering';
40-
import { tileKey_to_tix } from './tixrixqid';
40+
import { Qid, tileKey_to_tix } from './tixrixqid';
4141

4242
type TransformationStatus = 'queued' | 'in progress' | 'complete' | 'failed';
4343

@@ -962,6 +962,25 @@ export class Deeptable {
962962
}
963963
};
964964
}
965+
966+
getQids(qids: Qid[]): StructRowProxy[] {
967+
// For a set of tile row tuples, returns underlying
968+
// structRowProxies. Note that this makes no concrete guarantee about
969+
// **which** fields will be loaded in the underlying arrow arrays.
970+
const matches : StructRowProxy[] = []
971+
for (const [tix, rix] of qids) {
972+
if (this.flatTree[tix] === undefined) {
973+
// If we can't immediately find the tile, check if it's hiding
974+
const tile = this.map(t => t).find(t => t.tix === tix)
975+
this.flatTree[tix] = tile || null;
976+
}
977+
if (this.flatTree[tix] === null) {
978+
throw new Error("Missing tile index " + tix)
979+
}
980+
matches.push(this.flatTree[tix].record_batch.get(rix))
981+
}
982+
return matches
983+
}
965984
}
966985

967986
function area(rect: Rectangle) {

src/interaction.ts

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import type { Deeptable } from './Deeptable';
1515
import type * as DS from './types';
1616
import type { Scatterplot } from './scatterplot';
1717
import { PositionalAesthetic } from './aesthetics/ScaledAesthetic';
18+
import { Qid } from './tixrixqid';
1819
type Annotation = {
1920
x: number;
2021
y: number;
@@ -181,10 +182,12 @@ export class Zoom {
181182
this.zoomer = zoomer;
182183
}
183184

184-
set_highlit_points(data: StructRowProxy[]) {
185+
set_highlit_points(dd: Qid[]) {
185186
const { x_, y_ } = this.scales();
186187
const xdim = this.scatterplot.dim('x') as PositionalAesthetic;
187188
const ydim = this.scatterplot.dim('y') as PositionalAesthetic;
189+
190+
const data = this.scatterplot.deeptable.getQids(dd)
188191
this.scatterplot.highlit_point_change(data, this.scatterplot);
189192

190193
const annotations: Annotation[] = data.map((d) => {
@@ -226,10 +229,6 @@ export class Zoom {
226229
});
227230
}
228231

229-
set_highlit_point(point: StructRowProxy) {
230-
this.set_highlit_points([point]);
231-
}
232-
233232
add_mouseover() {
234233
let last_fired = 0;
235234
const renderer: ReglRenderer = this.renderers.get('regl') as ReglRenderer;

src/regl_rendering.ts

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ import {
3333
Data,
3434
Dictionary,
3535
Struct,
36-
StructRowProxy,
3736
Type,
3837
Utf8,
3938
Vector,
@@ -43,6 +42,7 @@ import { StatefulAesthetic } from './aesthetics/StatefulAesthetic';
4342
import { Filter, Foreground } from './aesthetics/BooleanAesthetic';
4443
import { ZoomTransform } from 'd3-zoom';
4544
import { Some, TupleMap, TupleSet } from './utilityFunctions';
45+
import { Qid } from './tixrixqid';
4646
// eslint-disable-next-line import/prefer-default-export
4747
export class ReglRenderer extends Renderer {
4848
public regl: Regl;
@@ -546,7 +546,7 @@ export class ReglRenderer extends Renderer {
546546
return v;
547547
}
548548

549-
color_pick(x: number, y: number): null | StructRowProxy {
549+
color_pick(x: number, y: number): null | Qid {
550550
if (y === 0) {
551551
// Not sure why, but this makes things complainy.
552552
// console.warn('that thing again.');
@@ -561,15 +561,7 @@ export class ReglRenderer extends Renderer {
561561
if (row_number === -1) {
562562
return null;
563563
}
564-
for (const tile of this.visible_tiles()) {
565-
if (tile.tix === tile_number) {
566-
return tile.record_batch.get(row_number);
567-
}
568-
}
569-
return null;
570-
// const p = this.tileSet.findPoint(point_as_int);
571-
// if (p.length === 0) { return; }
572-
// return p[0];
564+
return [tile_number, row_number]
573565
}
574566

575567
color_pick_single(

src/tile.ts

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ export class Tile {
7878
* @param key Either the string identifier of the tile,
7979
* OR a `TileManifest` object including an identifier. *
8080
* @param parent The parent tile -- used to navigate through the tree.
81-
* @param deeptable The full atlas deeptable of which this tile is a part.
81+
* @param deeptable The full deepscatter deeptable of which this tile is a part.
8282
*/
8383
constructor(key: string, parent: Tile | null, deeptable: Deeptable) {
8484
// If it's just initiated with a key, build that into a minimal manifest.
@@ -295,16 +295,16 @@ export class Tile {
295295
}
296296

297297
set metadata(
298-
manifest: TileMetadata | (TileMetadata & { extent: Rectangle }),
298+
metadata: TileMetadata | (TileMetadata & { extent: Rectangle }),
299299
) {
300300
// Setting the manifest is the thing that spawns children.
301-
this.highest_known_ix = manifest.max_ix;
301+
this.highest_known_ix = metadata.max_ix;
302302
this._metadata = {
303-
...manifest,
303+
...metadata,
304304
extent:
305-
typeof manifest.extent === 'string'
306-
? (JSON.parse(manifest.extent) as Rectangle)
307-
: manifest.extent,
305+
typeof metadata.extent === 'string'
306+
? (JSON.parse(metadata.extent) as Rectangle)
307+
: metadata.extent,
308308
};
309309
}
310310

src/tixrixqid.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import type {
88
} from 'apache-arrow';
99

1010
import type { Tile } from './deepscatter';
11-
import { Bitmask, DataSelection, Deeptable } from './deepscatter';
11+
import { DataSelection, Deeptable } from './deepscatter';
1212

1313
// The type below indicates that a Qid is not valid if
1414
// there are zero rows selected in the tile.

src/types.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import type {
1111
Timestamp,
1212
Utf8,
1313
Vector,
14+
TypeMap,
1415
} from 'apache-arrow';
1516
import type { Renderer } from './rendering';
1617
import type { Deeptable } from './Deeptable';
@@ -45,6 +46,9 @@ export type PointFunction<T = number> = (p: StructRowProxy) => T;
4546
* A proxy class that wraps around tile get calls. Used to avoid
4647
* putting Nomic login logic in deepscatter while fetching
4748
* tiles with authentication.
49+
*
50+
* An API call type must return Uint8Arrays that represent arrow
51+
* tables of the type produced by quadfeather.
4852
*
4953
*/
5054
export interface TileProxy {
@@ -101,7 +105,7 @@ export type DeeptableCreateParams = {
101105

102106
// A manifest listing all the tiles in the deeptable, of the type created by
103107
// quadfeather v2.0.0 or greater.
104-
tileManifest?: Table;
108+
tileManifest?: Table<{key: Utf8}>;
105109

106110
// A URL for an arrow file manifest. The schema for this manifest
107111
// is not yet publically documented: I hope to bundle it into the

src/wrap_arrow.ts

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,15 @@ import {
88
vectorFromArray,
99
Vector,
1010
Float,
11+
tableFromArrays,
12+
Utf8,
1113
} from 'apache-arrow';
1214
import { Deeptable } from './Deeptable';
1315
import { add_or_delete_column } from './Deeptable';
1416
import type * as DS from './types';
15-
import { extent } from 'd3-array';
17+
import { extent, extent, range } from 'd3-array';
1618
import { Rectangle } from './tile';
19+
import { tixToZxy } from './tixrixqid';
1720

1821
/**
1922
* This function is used to wrap an arrow table into a
@@ -31,22 +34,36 @@ export function wrapArrowTable(
3134
): Deeptable {
3235
let tb = tableFromIPC(tbArray);
3336
let batches = tb.batches;
37+
const minIx = []
38+
const maxIx = []
39+
// Extents of each tile, as JSON.
40+
const extents : string[] = []
3441
if (tb.getChild('ix') === null) {
3542
let rowNum = 0;
3643
batches = batches.map((batch) => {
3744
if (batch.numRows > 2 ** 16) {
3845
throw new Error(
39-
'Arrow record batches temporarily limited to 2^16 rows.',
46+
'Arrow record batches limited to 2^16 rows.',
4047
);
4148
}
4249
const array = new Int32Array(batch.numRows);
4350
for (let i = 0; i < batch.numRows; i++) {
4451
array[i] = rowNum++;
4552
}
46-
return add_or_delete_column(batch, 'ix', vectorFromArray(array));
53+
return add_or_delete_column(batch, 'ix', vectorFromArray(array));
4754
});
4855
tb = new Table(batches);
4956
}
57+
for (const batch of batches) {
58+
minIx.push(batch.get(0)['ix'])
59+
maxIx.push(batch.get(batch.numRows - 1)['ix'])
60+
extents.push(
61+
JSON.stringify({
62+
x: extent(batch.getChild('x')),
63+
y: extent(batch.getChild('y'))
64+
})
65+
)
66+
}
5067

5168
const proxy = new ArrowProxy(batches);
5269

@@ -65,12 +82,25 @@ export function wrapArrowTable(
6582
y: extent([...(y as Iterable<number>)]),
6683
} as Rectangle;
6784

85+
86+
const tileManifest = tableFromArrays({
87+
// @ts-expect-error missing types for tableFromArrays in arrow js
88+
key: vectorFromArray(range(batches.length).map(t => tixToZxy(t).join('/')), new Utf8()),
89+
min_ix: minIx,
90+
max_ix: maxIx,
91+
nPoints: batches.map(d => d.numRows),
92+
// @ts-expect-error missing types for tableFromArrays in arrow js
93+
extent: vectorFromArray(extents, new Utf8())
94+
})
95+
6896
return new Deeptable({
6997
baseUrl: `feather://table`,
7098
plot,
7199
tileProxy: proxy,
72100
tileStructure: 'other',
73101
extent: dataExtent,
102+
// @ts-expect-error missing types for tableFromArrays in arrow js
103+
tileManifest
74104
});
75105
}
76106

tests/dataset.spec.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ test('Columns can be deleted and replaced', async () => {
2828

2929
dataset.transformations['integers'] = async function (tile) {
3030
await tile.populateManifest();
31-
return new Float32Array(tile.manifest.nPoints);
31+
return new Float32Array(tile.metadata.nPoints);
3232
};
3333

3434
dataset.deleteColumn('integers');

0 commit comments

Comments
 (0)