Index files in Dat archives with map-reduce to create queryable data views.
Not yet stable.
// in beaker
import DatArchiveMapReduce from 'dat://map-reduce.beakerbrowser.com/v/1.0.0/index.js'
// in node
const DatArchiveMapReduce = require('@beaker/dat-archive-map-reduce')
// create instance
const damr = new DatArchiveMapReduce()// define your view
damr.define('site-posts-by-date', {
path: '/.data/unwalled.garden/posts/*.json',
map (value, meta, emit) {
let obj = JSON.parse(value)
if (isPost(obj)) {
let timestamp = Number(new Date(obj.createdAt))
emit([meta.origin, timestamp], meta.url)
}
}
})
function isPost (obj) {
if (obj.type !== 'unwalled.garden/post') return false
if (!obj.content || typeof obj.content !== 'string') return false
if (!obj.createdAt || typeof obj.createdAt !== 'string') return false
return true
}
// index sites
damr.index('dat://pfrazee.com', {watch: true})
damr.index('dat://mafintosh.com', {watch: true})
damr.index('dat://andrewosh.com', {watch: true})
// list the most recent 30 posts by pfrazee.com
await damr.list('site-posts-by-date', {
gt: ['dat://pfrazee.com', 0],
lt: ['dat://pfrazee.com', Infinity],
limit: 30,
reverse: true
})
// list the posts in the last 5 days by mafintosh.com
await damr.list('site-posts-by-date', {
gte: ['dat://mafintosh.com', Date.now() - ms('5d')],
lte: ['dat://mafintosh.com', Date.now()],
reverse: true
})// reduce example
damr.define('site-posts-count', {
path: '/.data/unwalled.garden/posts/*.json',
map (value, meta, emit) {
let obj = JSON.parse(value)
if (isPost(obj)) {
emit(meta.origin, meta.pathname)
}
},
reduce (acc, value, key) {
return (acc||0) + 1
}
})
await damr.get('site-posts-count', 'dat://pfrazee.com')Table of Contents generated with DocToc
- Class: DatArchiveMapReduce
- Instance: DatArchiveMapReduce
- damr.open()
- damr.close()
- damr.destroy()
- damr.define(name, definition)
- damr.reset(view)
- damr.get(view, key)
- damr.list(view, opts)
- damr.index(url[, opts])
- damr.unindex(url)
- damr.indexFile(archive, filepath)
- damr.indexFile(url)
- damr.unindexFile(archive, filepath)
- damr.unindexFile(url)
- damr.listIndexed()
- damr.isIndexed(url)
- Event: 'open'
- Event: 'open-failed'
- Event: 'view-reset'
- Event: 'archive-indexing'
- Event: 'archive-index-progress'
- Event: 'archive-indexed'
- Event: 'archive-missing'
- Event: 'archive-found'
- Event: 'archive-error'
var damr = new DatArchiveMapReduce('views')nameString. Defaults to'views'. If run in the browser, this will be the name of the IndexedDB instance. If run in NodeJS, this will be the path of the LevelDB folder.optsObject.DatArchiveConstructor. The class constructor for dat archive instances. If in node, you should specify node-dat-archive.
Create a new DatArchiveMapReduce instance.
The given name will control where the indexes are saved.
You can specify different names to run multiple DatArchiveMapReduce instances at once.
await damr.open()- Returns Promise<Void>.
Opens the internal databases. Will be called automatically by other methods, so you usually don't need to call this method.
await damr.close()- Returns Promise<Void>.
Closes the DatArchiveMapReduce instance.
await damr.destroy()- Returns Promise<Void>.
Closes and deletes all indexes in the DatArchiveMapReduce instance.
You can .destroy() and then .open() a DatArchiveMapReduce to recreate its indexes.
await damr.destroy()
await damr.open()nameString. The name of the view.definitionObject.pathString or Array<String>. An anymatch list of files to index.mapFunction(value, meta, emit). A method to accept a new or changed file and emit new stored entries in the view.valueString.metaObject.urlString. The URL of the file (eg 'dat://foo.com/bar.json').originString. The origin of the file's site (eg 'dat://foo.com').pathnameString. The path of the file in the site (eg '/bar.json').
emitFunction(key, value). Call this to emit new mapped values.keyString or Array<String>. The key to store the new entry at.valueAny. The value to store for the entry.
reduceFunction(agg, value, key). A method to aggregate mapped entries into a single value.aggAny. The current value of the reduce method's output.valueAny. The next mapped value to process.keyAny. The key of the entry being processed.- Must return the current value of the reduced entry.
- Returns Promise<Void>.
Creates a new view on the damr object.
Example:
// create a view that counts the number of posts by each user
damr.define('site-posts-count', {
path: '/.data/unwalled.garden/posts/*.json',
map (value, meta, emit) {
let obj = JSON.parse(value)
if (isPost(obj)) {
emit(meta.origin, meta.pathname)
}
},
reduce (acc, value, key) {
return (acc||0) + 1
}
})
// get the number of posts by dat://pfrazee.com
await damr.index('dat://pfrazee.com')
await damr.get('site-posts-count', 'dat://pfrazee.com')await damr.reset('site-posts-by-date')viewString. The name of the view to reset.
Clears all data indexed in the view. This should be used when the view-definition has changed and needs to be rebuilt.
// get the post by pfrazee.com that was created at "Tue, 23 Jul 2019 18:23:57 GMT"
var post = await damr.get('site-posts-by-date', ['dat://pfrazee.com', Number(new Date('Tue, 23 Jul 2019 18:23:57 GMT'))])viewString. The name of the view to query.keyAny. The key of the entry to fetch.- Returns Promise<Object>.
keyAny. The key of the entry.valueAny. The value of the entry.
Get the entry at the given key.
// list the most recent 30 posts by pfrazee.com
await damr.list('site-posts-by-date', {
gte: ['dat://pfrazee.com', 0],
lte: ['dat://pfrazee.com', Infinity],
limit: 30,
reverse: true
})
// list the posts in the last 5 days by mafintosh.com
await damr.list('site-posts-by-date', {
gte: ['dat://mafintosh.com', Date.now() - ms('5d')],
lte: ['dat://mafintosh.com', Date.now()],
reverse: true
})viewString. The name of the view to query.optsObject.gtAny. The start key in the range to query (exclusive).gteAny. The start key in the range to query (inclusive).ltAny. The end key in the range to query (exclusive).lteAny. The end key in the range to query (inclusive).reverseBoolean. Reverse the order of the output? Defaults to false.limitNumber. Limit the number of entries returned. Defaults to no limit.
- Returns Promise<Array<Object>>.
keyAny. The key of the entry.valueAny. The value of the entry.
List a range of entries from a view.
await damr.index('dat://foo.com')urlString or DatArchive. The site to index.optsObject.watchBoolean. Should DatArchiveMapReduce watch the archive for changes, and index them immediately? Defaults to false.
- Returns Promise<Void>.
Add a dat:// site to be indexed. The method will return when the site has been fully indexed.
await damr.unindex('dat://foo.com')urlString or DatArchive. The site to deindex.- Returns Promise<Void>.
Remove a dat:// site from the dataset. The method will return when the site has been fully de-indexed.
await damr.indexFile(fooArchive, '/bar.json')archiveDatArchive. The site containing the file to index.filepathString. The path of the file to index.- Returns Promise<Void>.
Add a single file to the dataset. The method will return when the file has been indexed.
This will not add the file or its archive to the list returned by listIndexed().
DatArchiveMapReduce will not watch the file after this call.
await damr.indexFile('dat://foo.com/bar.json')urlString. The url of the file to index.- Returns Promise<Void>.
Add a single file to the dataset. The method will return when the file has been indexed.
This will not add the file or its archive to the list returned by listIndexed().
DatArchiveMapReduce will not watch the file after this call.
await damr.unindexFile(fooArchive, '/bar.json')archiveDatArchive. The site containing the file to deindex.filepathString. The path of the file to deindex.- Returns Promise<Void>.
Remove a single file from the dataset. The method will return when the file has been de-indexed.
await damr.unindexFile('dat://foo.com/bar.json')urlString. The url of the file to deindex.- Returns Promise<Void>.
Remove a single file from the dataset. The method will return when the file has been de-indexed.
var urls = await damr.listIndexed()- Returns Array<String>.
Lists the URLs of the dat:// sites which are included in the dataset.
var yesno = await damr.isIndexed('dat://foo.com')- Returns Boolean.
Is the given dat:// URL included in the dataset?
damr.on('open', () => {
console.log('DatArchiveMapReduce is ready for use')
})Emitted when the DatArchiveMapReduce instance has been opened using open().
damr.on('open-failed', (err) => {
console.log('DatArchiveMapReduce failed to open', err)
})errorError.
Emitted when the DatArchiveMapReduce instance fails to open during open().
damr.on('view-reset', ({view}) => {
console.log('DatArchiveMapReduce has reset the indexes for', view)
})viewString. The name of the view that was reset.
Emitted when reset() has been called on a view. All map/reduced entries are cleared for the view.
damr.on('archive-indexing', ({view, origin, start, end}) => {
console.log(view, 'is updating for', origin, 'from version', start, 'to', end)
})viewString. The view that is indexing.originString. The archive that was updated.startNumber. The version which is being indexed from.endNumber. The version which is being indexed to.
Emitted when the DatArchiveMapReduce instance has started to index the given archive.
damr.on('archive-index-progress', ({view, origin, current, total}) => {
console.log(view, 'update for', origin, 'is', Math.round(current / total * 100), '% complete')
})viewString. The view that is indexing.originString. The archive that was updated.currentNumber. The current update being applied.totalNumber. The total number of updates being applied.
Emitted when an update has been applied during an indexing process.
damr.on('archive-indexed', ({view, origin, version}) => {
console.log(view, 'was updated for', url, 'at version', version)
})viewString. The view that is indexing.originString. The archive that was updated.versionNumber. The version which was updated to.
Emitted when the DatArchiveMapReduce instance has indexed the given archive.
This is similar to 'view-updated', but it fires every time a archive is indexed, whether or not it results in updates to the indexes.
damr.on('archive-missing', ({origin}) => {
console.log('DatArchiveMapReduce couldnt find', origin, '- now searching')
})originString. The archive that is missing.
Emitted when a archive's data was not locally available or found on the network.
When this occurs, DatArchiveMapReduce will continue searching for the data, and emit 'archive-found' on success.
damr.on('archive-found', ({origin}) => {
console.log('DatArchiveMapReduce has found and indexed', origin)
})originString. The archive that was found.
Emitted when a archive's data was found after originally not being found during indexing.
This event will only be emitted after 'archive-missing' is emitted.
damr.on('archive-error', ({origin, error}) => {
console.log('DatArchiveMapReduce failed to index', origin, error)
})originString. The archive that failed.errorError. The error emitted.
Emitted when an archive fails to load.