diff --git a/build-files.txt b/build-files.txt index 6075c56c22..9828987e93 100644 --- a/build-files.txt +++ b/build-files.txt @@ -18,7 +18,14 @@ source/dub/generators/sublimetext.d source/dub/generators/targetdescription.d source/dub/generators/visuald.d source/dub/init.d +source/dub/index/bitbucket.d +source/dub/index/client.d +source/dub/index/data.d +source/dub/index/github.d +source/dub/index/gitlab.d +source/dub/index/utils.d source/dub/internal/configy/attributes.d +source/dub/internal/configy/backend/json.d source/dub/internal/configy/backend/node.d source/dub/internal/configy/backend/yaml.d source/dub/internal/configy/easy.d @@ -82,6 +89,7 @@ source/dub/packagemanager.d source/dub/packagesupplier.d source/dub/packagesuppliers/fallback.d source/dub/packagesuppliers/filesystem.d +source/dub/packagesuppliers/index.d source/dub/packagesuppliers/maven.d source/dub/packagesuppliers/package.d source/dub/packagesuppliers/packagesupplier.d diff --git a/source/dub/commandline.d b/source/dub/commandline.d index dc2bf49027..bcc9428594 100644 --- a/source/dub/commandline.d +++ b/source/dub/commandline.d @@ -73,7 +73,10 @@ CommandGroup[] getCommands() @safe pure nothrow new ListOverridesCommand, new CleanCachesCommand, new ConvertCommand, - ) + // This is index management but those commands are hidden + new IndexBuildCommand, + new IndexFromRegistryCommand, + ), ]; } @@ -273,7 +276,8 @@ unittest { assert(handler.commandNames == ["init", "run", "build", "test", "lint", "generate", "describe", "clean", "dustmite", "fetch", "add", "remove", "upgrade", "add-path", "remove-path", "add-local", "remove-local", "list", "search", - "add-override", "remove-override", "list-overrides", "clean-caches", "convert"]); + "add-override", "remove-override", "list-overrides", "clean-caches", "convert", + "index-build", "index-fromregistry"]); } /// It sets the cwd as root_path by default @@ -2983,6 +2987,255 @@ class ConvertCommand : Command { } +/******************************************************************************/ +/* Index management +/******************************************************************************/ + +public class IndexBuildCommand : Command { + import dub.index.bitbucket; + import dub.index.data; + import dub.index.github; + import dub.index.gitlab; + import dub.index.utils; + import std.random; + import std.range; + + /// Index file to use + private string index = "index.yaml"; + /// Filename to write to + private string output = "index-build-result"; + /// Packages to filter in - assume all if empty + private string[] include; + /// Packages to filter out + private string[] exclude; + /// Bearer token to use to authenticate requests + private string githubToken, gitlabToken, bitbucketToken; + /// Kind of packages to include (default: all kinds) + private string[] kind; + /// Whether to force the iteration of tags or not + /// This needs to be used if some tags need to be reprocessed + private bool force_tags; + /// Force the package to be entirely reprocessed. Imply `--force-tags`. + private bool force; + /// Whether to use a randomized sample of packages + private bool random; + /// The number of packages to update + private uint maxUpdates = uint.max; + /// Source and target index to use, mutually exclusive with `random` + private uint fromIdx = 0, toIdx = uint.max; + + this() @safe pure nothrow + { + this.name = "index-build"; + this.description = "Generate the rich index from the index.yaml file"; + this.helpText = [ "This command is for internal use only. Do not use it." ]; + this.hidden = true; + } + + override void prepare(scope CommandArgs args) { + args.getopt("bitbucket-token", &this.bitbucketToken, ["Bearer token to use when issuing Bitbucket requests"]); + args.getopt("github-token", &this.githubToken, ["Bearer token to use when issuing Github requests"]); + args.getopt("gitlab-token", &this.gitlabToken, ["Bearer token to use when issuing GitLab requests"]); + args.getopt("output", &this.output, ["Where to output the data (path to a folder)"]); + args.getopt("index", &this.index, ["Index file to use - default to 'index.yaml'"]); + args.getopt("include", &this.include, ["Which packages to filter in - if not, assume all"]); + args.getopt("exclude", &this.exclude, ["Which packages to filter out - if not, assume none"]); + args.getopt("kind", &this.kind, ["Kind of packages to include (github, gitlab, bitbucket). Default: all"]); + args.getopt("force", &this.force, ["Force Dub to reprocess packages even if it has cache informations"]); + args.getopt("force-tags", &this.force_tags, + ["Force Dub to re-list tags, but do not reload the recipe if the commit hasn't changed."]); + args.getopt("random", &this.random, ["Randomize the order in which packages are processed"]); + args.getopt("max-updates", &this.maxUpdates, ["Maximum number of packages to process"]); + args.getopt("from", &this.fromIdx, ["Index to seek to before iterating the list of packages (default: 0)"]); + args.getopt("to", &this.toIdx, ["Index to stop at when iterating the list of packages (default: end of list)"]); + + enforce(this.fromIdx <= this.toIdx, "Cannot have source index (`--from`) be past end index (`--to`)"); + enforce(this.fromIdx == 0 || !this.random, + "Cannot specify source index (`--from`) for random sampling (`--random`)"); + enforce(this.toIdx == uint.max || !this.random, + "Cannot specify end index (`--to`) for random sampling (`--random`)"); + } + + override int execute(Dub dub, string[] free_args, string[] app_args) + { + import dub.index.client : RepositoryClient; + import dub.index.data; + import dub.internal.configy.easy; + static import std.file; + import std.typecons; + + enforceUsage(free_args.length == 0, "Expected no free argument."); + enforceUsage(app_args.length == 0, "Expected zero application arguments."); + + const isUpdate = std.file.exists(this.output); + if (isUpdate) + enforce(std.file.isDir(this.output), this.output ~ ": is not a directory"); + else + std.file.mkdirRecurse(this.output); + + auto indexN = parseConfigFileSimple!PackageList(this.index); + if (indexN.isNull()) return 1; + auto indexC = indexN.get(); + logInfoNoTag("Found %s packages in the index file", indexC.packages.length); + + const NativePath outputPath = NativePath(this.output); + size_t processed, updated, notsupported; + string[] included, excluded, errored; + scope gh = new GithubClient(this.githubToken); + scope gl = new GitLabClient(this.gitlabToken); + scope bb = new BitbucketClient(this.bitbucketToken); + + void update (scope RepositoryClient client, in PackageEntry pkg) { + const target = getPackageDescriptionPath(outputPath, PackageName(pkg.name.value)); + ensureDirectory(target.parentPath()); + const targetStr = target.toNativeString(); + auto previous = !this.force && std.file.exists(targetStr) ? + parseConfigFileSimple!(IndexedPackage!0)(targetStr, StrictMode.Ignore) : + Nullable!(IndexedPackage!0).init; + if (this.force_tags && !previous.isNull()) + previous.get().cache = CacheInfo.init; + auto res = updateDescription(client, pkg, previous); + if (previous.isNull() || previous.get() != res) { + std.file.write(targetStr, res.serializeToJsonString()); + ++updated; + } + } + + // Update a single package - this code is in its own function as it + // is wrapped in a try-catch in the `foreach` to process as many packages + // as possible + void updatePackageIndex (in PackageEntry pkg) { + logInfo("[%s] Processing included package", pkg.name.value); + switch (pkg.source.kind) { + case `github`: + scope client = gh.new Repository(pkg.source.owner, pkg.source.project); + update(client, pkg); + break; + case `gitlab`: + scope client = gl.new Project(pkg.source.owner, pkg.source.project); + update(client, pkg); + break; + case `bitbucket`: + scope client = bb.new Repository(pkg.source.owner, pkg.source.project); + update(client, pkg); + break; + default: + throw new Exception("Package kind not supported: " ~ pkg.source.kind); + } + } + + int processEntry (size_t idx, ref PackageEntry pkg) { + if (updated >= this.maxUpdates) return 1; + if (this.include.length && !this.include.canFind(pkg.name.value)) + return 0; + if (this.exclude.canFind(pkg.name.value)) { + excluded ~= pkg.name.value; + return 0; + } + if (this.kind.length && !this.kind.canFind(pkg.source.kind)) + return 0; + + ++processed; + try + updatePackageIndex(pkg); + catch (Exception exc) { + errored ~= pkg.name.value; + // If we get a 404 here, it might be a dead package + logError("[%s] Could not build index for package: %s", + pkg.name.value, exc.message()); + } + + if (this.include.length) { + included ~= pkg.name.value; + if (included.length % 10 == 0) { + const rl = gh.getRateLimit(); + logDebug("Requests still available: %s/%s", rl.remaining, rl.limit); + } + } + else if (idx % 10 == 0) { + const rl = gh.getRateLimit(); + logDebug("Requests still available: %s/%s", rl.remaining, rl.limit); + } + return 0; + } + + if (this.random) { // Can't use `std.random : choose` because bugs + foreach (idx, pkg; indexC.packages.randomCover().enumerate) + if (processEntry(idx, pkg)) + break; + } else { + const startIdx = min(this.fromIdx, indexC.packages.length); + const endIdx = min(this.toIdx, indexC.packages.length); + foreach (idx, pkg; indexC.packages[startIdx .. endIdx]) + if (processEntry(idx, pkg)) + break; + } + + logInfoNoTag("Updated %s packages out of %s processed (%s excluded, %s errors, %s not supported)", + updated, processed, excluded.length, errored.length, notsupported); + if (this.include.length && included != this.include) + logWarn("Not all explicitly-included packages have been processed!"); + if (errored.length) + logWarn("The following packages errored out:\n%(\t- %s\n%)", errored); + if (!this.kind.length || this.kind.canFind(`github`)) { + const rl = gh.getRateLimit(); + logInfoNoTag("Github requests still available: %s/%s", rl.remaining, rl.limit); + } + return 0; + } +} + +public class IndexFromRegistryCommand : Command { + /// Filename to write to + private string output = "index.yaml"; + /// Bypass cache, always query the registry + private bool force; + + this() @safe pure nothrow + { + this.name = "index-fromregistry"; + this.description = "Generate the index.yaml file from the remote registry"; + this.helpText = [ "This command is for internal use only. Do not use it." ]; + this.hidden = true; + } + + override void prepare(scope CommandArgs args) { + args.getopt("O", &this.output, ["Where to output the data ('-' is supported)"]); + args.getopt("f", &this.force, ["Bypass the cache and always query the registry"]); + } + + override int execute(Dub dub, string[] free_args, string[] app_args) + { + import dub.internal.vibecompat.inet.url; + import dub.packagesuppliers.registry; + import std.format; + + enforceUsage(free_args.length == 0, "Expected zero arguments."); + enforceUsage(app_args.length == 0, "Expected zero application arguments."); + + scope registry = new RegistryPackageSupplier(URL(defaultRegistryURLs[1])); + scope allPkgs = registry.getPackageDump(this.force); + writeln("Found ", allPkgs.array.length, " packages"); + scope output = this.output == "-" ? stdout : File(this.output, "w+"); + scope writer = output.lockingTextWriter(); + writer.formattedWrite("packages:\n"); + foreach (pkg; allPkgs.array) { + writer.formattedWrite(` %s: + source: + kind: %s + owner: %s + project: %s +`, + pkg["name"].opt!string, pkg["repository"]["kind"].opt!string, + pkg["repository"]["owner"].opt!string, + pkg["repository"]["project"].opt!string); + } + + return 0; + } +} + + /******************************************************************************/ /* HELP */ /******************************************************************************/ diff --git a/source/dub/dub.d b/source/dub/dub.d index 11cf840b82..89df319e0b 100644 --- a/source/dub/dub.d +++ b/source/dub/dub.d @@ -47,6 +47,7 @@ deprecated("use defaultRegistryURLs") enum defaultRegistryURL = defaultRegistryU /// The URL to the official package registry and it's default fallback registries. static immutable string[] defaultRegistryURLs = [ + "dub+index+https://github.com/dlang/dub-index.git", "https://code.dlang.org/", "https://codemirror.dlang.org/" ]; @@ -413,6 +414,12 @@ class Dub { switch (url.startsWith("dub+", "mvn+", "file://")) { case 1: + // `startsWith` takes the shortest match so if we provide `dub+` + // `and `dub+index+` it will always match the former... + if (url.startsWith(`dub+index+`)) { + return new IndexPackageSupplier(URL(url["dub+index+".length .. $]), + this.m_dirs.userPackages ~ "index"); + } return new RegistryPackageSupplier(URL(url[4..$])); case 2: return new MavenRegistryPackageSupplier(URL(url[4..$])); @@ -1291,7 +1298,7 @@ class Dub { try results ~= tuple(ps.description, ps.searchPackages(query)); catch (Exception e) { - logWarn("Searching %s for '%s' failed: %s", ps.description, query, e.msg); + logWarn("Searching %s for '%s' failed: %s", ps.description, query, e); } } return results.filter!(tup => tup[1].length); diff --git a/source/dub/index/bitbucket.d b/source/dub/index/bitbucket.d new file mode 100644 index 0000000000..a4719d92dc --- /dev/null +++ b/source/dub/index/bitbucket.d @@ -0,0 +1,165 @@ +/******************************************************************************* + + Types and functions to interface with the Bitbucket API to generate an index + entry + + See_Also: + https://developer.atlassian.com/cloud/bitbucket/rest/intro/ + +*******************************************************************************/ + +module dub.index.bitbucket; + +import dub.dependency; +import dub.index.client; +import dub.index.data; +import dub.index.utils; +import dub.internal.utils; +import dub.internal.logging; +import dub.internal.vibecompat.data.json; +import dub.internal.vibecompat.inet.path; +import dub.internal.vibecompat.inet.url; +import dub.internal.vibecompat.inet.urlencode; +import dub.package_ : packageInfoFiles; +import dub.recipe.io; +import dub.recipe.packagerecipe; + +import std.algorithm; +import std.exception; +import std.format; +import std.range; +import std.typecons; + +/** + * Client implementation for Bitbucket + */ +public class BitbucketClient : APIClient { + /** + * Construct an instance of this BitBucket client + * + * Params: + * url = URL of the Bitbucket API to use, defaults to the cloud one + */ + public this (string token = null, string url = `https://api.bitbucket.org/2.0/`) { + super(URL(url), token); + } + + /** + * Scoped client for repository-specific interaction + */ + public class Repository : RepositoryClient { + /// Path to the repository + private InetPath path; + + /** + * Construct an instance of this object + */ + public this (string owner, string project) @safe { + this.path = InetPath("repositories/") ~ owner ~ project; + } + + /// https://developer.atlassian.com/cloud/bitbucket/rest/api-group-repositories/#api-repositories-workspace-repo-slug-get + public override string getDescription (CacheInfo cache = CacheInfo.init) { + return this.outer.get(this.path, cache).result["description"].opt!string; + } + + /** + * Get all the tags for this repository. + * + * Returns: + * A `null` `Nullable` if there was a cache hit, the list of tags + * otherwise. + * + * See_Also: + * https://developer.atlassian.com/cloud/bitbucket/rest/api-group-refs/#api-repositories-workspace-repo-slug-refs-tags-get + */ + public override RequestResult!(TagDescription[]) getTags (CacheInfo cache) { + static TagDescription[] jsonToTag (Json[] data) { + return data.filter!(t => t["target"]["type"].opt!string == "commit") + .map!(t => TagDescription(t["name"].opt!string, t["target"]["hash"].opt!string)) + .array; + } + + TagDescription[] getTagsInternal (string url) { + if (!url.length) return null; + auto res = this.outer.get(URL(url)); + auto ret = jsonToTag(res.result.parseJson().opt!(Json[])); + if (res.next.length) + return ret ~ getTagsInternal(res.next); + return ret; + } + + auto res = this.outer.get(this.path ~ "refs" ~ "tags", cache); + if (res.notModified) return res.convert!(TagDescription[])(null); + return res.convert(jsonToTag(res.result["values"].opt!(Json[])) ~ + getTagsInternal(res.result["next"].opt!string)); + } + + /// Implementation of findRecipe + public override RequestResult!PackageRecipe findRecipe ( + ref InetPath path, string reference, CacheInfo cache) { + auto localPath = path; + if (!packageInfoFiles.filter!(pif => path.head == pif.filename).empty) { + try + return this.getRecipe(path, reference, cache); + catch (Exception exc) + localPath = path.hasParentPath() ? path.parentPath() : InetPath(`/`); + } + + // We don't cache `findRecipeSummary`, and if we reached this branch + // our caching information is useless anyway as we didn't have the + // right path. + auto res = this.findRecipeSummary(reference, CacheInfo.init, localPath); + enforce(res.result != Json.emptyObject, + "Could not find recipe file at '%s' in repository".format(path)); + path = InetPath(res.result["path"].opt!string); + return this.getRecipe(path, reference, CacheInfo.init); + } + + /** + * Get a PackageRecipe from Bitbucket + * + * Unlike Github or GitLab, Bitbucket returns the raw file by default. + * + * See_Also: + * https://developer.atlassian.com/cloud/bitbucket/rest/api-group-source/#api-repositories-workspace-repo-slug-src-commit-path-get + */ + public RequestResult!PackageRecipe getRecipe (InetPath path, string reference, + CacheInfo cache) { + auto url = this.outer.url ~ (this.path ~ "src" ~ reference ~ path); + auto res = this.outer.get(url, cache.etag, cache.last_modified); + if (res.notModified) + return res.convert(PackageRecipe.init); + return res.convert(parsePackageRecipe(res.result.idup, path.toString())); + } + + /** + * Find a recipe in a directory + */ + public RequestResult!Json findRecipeSummary ( + string reference, CacheInfo cache, InetPath where) { + import std.path : baseName; + + if (where.absolute) { + auto segments = where.bySegment(); + segments.popFront(); + where = InetPath(segments); + assert(!where.absolute); + } + + auto path = (this.path ~ "src" ~ reference ~ where); + path.endsWithSlash = true; + // TODO: We don't handle pagination here. Perhaps use Bitbucket + // filtering language to make sure we get only the `dub` files ? + scope res = this.outer.get(path, cache); + if (res.notModified) return res.convert(Json.emptyObject); + auto dir = res.result["values"].opt!(Json[]); + foreach (info; packageInfoFiles) { + auto resrng = dir.find!(entry => entry["path"].opt!string.baseName == info.filename); + if (!resrng.empty) + return res.convert(resrng.front); + } + return res.convert(Json.emptyObject); + } + } +} diff --git a/source/dub/index/client.d b/source/dub/index/client.d new file mode 100644 index 0000000000..f87e763c8b --- /dev/null +++ b/source/dub/index/client.d @@ -0,0 +1,342 @@ +/******************************************************************************* + + Base class for API clients + + API clients need to be able to provide an authentication token, and to save + rate limits. They use cURL under the hood. This exposes a base class for + for them to use. + +*******************************************************************************/ + +module dub.index.client; + +import dub.index.data; +import dub.internal.vibecompat.data.json; +import dub.internal.vibecompat.inet.path; +import dub.internal.vibecompat.inet.url; + +import std.algorithm.searching; +import std.algorithm.iteration; +import std.array; +import std.conv; +import std.datetime; +import std.encoding : EncodingScheme; +import std.exception; +import std.format; +import std.net.curl; +import std.regex : matchAll; +import std.string : indexOf; +import std.typecons; +import std.uni; + +/** + * Base class that specific API derive from + */ +package class APIClient { + /// The connection itself + version (DubUseCurl) protected HTTP connection; + /// Optional token to authenticate requests + protected string token; + /// URL of the GitLab API + protected URL url; + /// Rate limit informations from the last query + protected RateLimit rate_limit; + + /** + * Construct an instance of this client + * + * Params: + * url = URL of the API to use. + * token = Optional token to use to authenticate requests. + */ + public this (URL url, string token = null) { + this.url = url; + this.token = token; + version (DubUseCurl) { + this.connection = HTTP(url.toString()); + import dub.internal.utils : setupHTTPClient; + setupHTTPClient(this.connection, 8 /* seconds */); + } + } + + /** + * Get the latest known rate limit + * + * The return of the function is the latest known rate-limit, + * based on the last time a query was performed. If no query has been + * performed, all values will be 0. + */ + public RateLimit getRateLimit () const scope @safe pure nothrow @nogc { + return this.rate_limit; + } + + /** + * Perform an HTTP GET request and returns the result as JSON + * + * This is a thin wrapper around the other `get` overload to return + * Json, or an empty object if there is a cache match. + * + * Params: + * path = Path that is added to `this.url` to form the target + * cache = Optional cache information + * query = Optional query string + * + * Returns: + * A request result that is set to `Json.emptyObject` if there was + * a cache hit, or the data if the request was successful. + * + * Throws: + * As the other `get` overload (in case of error). + */ + public RequestResult!Json get (InetPath path, CacheInfo cache, string query = null) { + auto url = (this.url ~ path); + url.normalize(path.endsWithSlash); + url.queryString = query; + auto res = this.get!char(url, cache.etag, cache.last_modified); + return res.convert(res.notModified ? Json.emptyObject : res.result.parseJson()); + } + + /** + * Perform an HTTP GET request and returns the result, using etags if possible + * + * This behaves similarly to `std.net.curl.get`, but in addition handles + * authorization, Etag, and saves the rate limit values to `this.rate_limit`. + * + * Params: + * T = Encoding type, defaults to `char`, `ubyte` can also be used. + * url = The URL to request + * etag = Optional etag value to use for `if-none-match` value, allowing + * us to issue conditional requests which do not use up our API + * quota. + * last_modified = Used for caching via `if-modified-since` header. + * If etag is also provided, both will be used, + * however servers tend to prioritize etags. + * + * Returns: + * The resulting data, or null if a 304 was returned (cache hit). + * + * Throws: + * If status code different from 2xx or 304 is returned. + */ + version (DubUseCurl) + public RequestResult!(T[]) get (T = char) (URL url, + string etag = null, string last_modified = null) { + + auto content = appender!(ubyte[])(); + HTTP.StatusLine statusLine; + CacheInfo cache; + string charset = "utf-8", next; + + if (this.token.length) + this.connection.addRequestHeader("authorization", "Bearer " ~ token); + if (etag.length) + this.connection.addRequestHeader("if-none-match", etag); + if (last_modified.length) + this.connection.addRequestHeader("if-modified-since", last_modified); + scope (exit) this.connection.clearRequestHeaders(); + + this.connection.onReceiveHeader = (in char[] key, in char[] value) { + // Required for things to work + if (!icmp(key, "content-length")) + content.reserve(value.to!size_t); + else if (!icmp(key, "content-type")) { + auto io = indexOf(value, "charset=", No.caseSensitive); + if (io != -1) + charset = value[io + "charset=".length .. $].findSplit(";")[0].idup; + } + + // Pagination + else if (!icmp(key, "link")) { + foreach (lnk; value.splitter(",")) { + auto matches = matchAll(lnk, `^\s*<([^>]*)>;\s*rel="(.*)"$`); + // If we have a match, first one is the whole string, then + // the URL, then the `rel` + if (matches.empty) continue; + if (matches.front.length != 3) continue; + if (matches.front[2] != "next") continue; + next = matches.front[1].idup; + break; + } + } + + // Caching + else if (!icmp(key, "etag")) + cache.etag = value.idup; + else if (!icmp(key, "last-modified")) + cache.last_modified = value.idup; + + // Handle rate limiting + else if (!icmp(key, "x-ratelimit-limit")) + this.rate_limit.limit = value.to!size_t; + else if (!icmp(key, "x-ratelimit-remaining")) + this.rate_limit.remaining = value.to!size_t; + else if (!icmp(key, "x-ratelimit-used")) + this.rate_limit.used = value.to!size_t; + else if (!icmp(key, "x-ratelimit-reset")) + this.rate_limit.reset = SysTime.fromUnixTime(value.to!long, UTC()); + + }; + this.connection.onReceive = (ubyte[] data) { + content ~= data; + return data.length; + }; + + this.connection.onReceiveStatusLine = (HTTP.StatusLine l) { statusLine = l; }; + this.connection.url = url.toString(); + this.connection.perform(); + if (statusLine.code == 304) + return typeof(return)(true, null, cache, next); // Cache hit + enforce(statusLine.code / 100 == 2, new HTTPStatusException(statusLine.code, + format("HTTP request returned status code %d (%s)", statusLine.code, statusLine.reason))); + + return typeof(return)(false, _decodeContent!T(content.data, charset), cache, next); + } + else + public RequestResult!(T[]) get (T = char) (URL url, + string etag = null, string last_modified = null) { + assert(0, "Need `DubUseCurl` to be able to use index client"); + } +} + +/** + * Represent the functionality a single repository implements + */ +package(dub) interface RepositoryClient { + import dub.recipe.packagerecipe; + + /** + * Get the project description as filled in the provider + * + * This can be used as a fallback if the recipe file does not + * contain a description. + */ + public string getDescription (CacheInfo cache); + + /** + * Get all the tags for this repository. + * + * Returns: + * A `null` `Nullable` if there was a cache hit, the list of tags + * otherwise. + * + * See_Also: + * https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-repository-tags + */ + public RequestResult!(TagDescription[]) getTags (CacheInfo cache); + + /** + * Get the deserialized `PackageRecipe` + * + * This finds the package recipe in the repository, then fetches and + * deserializes it. + * + * Params: + * path = This can be either where to look for the package file, + * or if the path is to a package file itself (ends with one + * of the recognized package name), where to look it up. + * If the package file is not found, this will fall back + * to calling `findRecipeSummary`. + * If the recipe path doesn't match, this value will be + * changed to match the actual path. + * reference = The reference at which the file is looked up, e.g. + * `master` or a tag such as `v1.0.0`. + * cache = Cache information about this recipe file, if any. + */ + public RequestResult!PackageRecipe findRecipe ( + ref InetPath path, string reference, CacheInfo cache); +} + +/// Simple wrapper for a request result +package struct RequestResult (T) { + /// Whether or not there was a cache hit + public bool notModified; + + /// The data, may be null if there was a cache hit + public T result; + + /// Cache information + public CacheInfo cache; + + /// Link information (for paginated result) + public string next; + + /// Convenience function for various convertion + public RequestResult!OT convert (OT) (OT value = OT.init) { + return typeof(return)(this.notModified, value, this.cache, this.next); + } +} + +/// https://docs.github.com/en/rest/rate-limit/rate-limit?apiVersion=2022-11-28 +/// https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api?apiVersion=2022-11-28#checking-the-status-of-your-rate-limit +public struct RateLimit { + /// `x-ratelimit-limit` + /// The maximum number of requests that you can make per hour + public size_t limit; + /// x-ratelimit-remaining + /// The number of requests remaining in the current rate limit window + public size_t remaining; + /// x-ratelimit-used + /// The number of requests you have made in the current rate limit window + public size_t used; + /// x-ratelimit-reset + /// The time at which the current rate limit window resets, in UTC epoch seconds + public SysTime reset; +} + +/// Wrapper for tag information +public struct TagDescription { + /// Name of this tag + public string name; + + /// Commit SHA this refers to + public string commit; +} + +// Taken from `std.net.curl` +private auto _decodeContent (T) (ubyte[] content, string encoding) +{ + static if (is(T == ubyte)) + return content; + else + { + import std.exception : enforce; + import std.format : format; + import std.uni : icmp; + + // Optimally just return the utf8 encoded content + if (icmp(encoding, "UTF-8") == 0) + return cast(char[])(content); + + // The content has to be re-encoded to utf8 + auto scheme = EncodingScheme.create(encoding); + enforce!CurlException(scheme !is null, + format("Unknown encoding '%s'", encoding)); + + auto strInfo = decodeString(content, scheme); + enforce!CurlException(strInfo[0] != size_t.max, + format("Invalid encoding sequence for encoding '%s'", encoding)); + + return strInfo[1]; + } +} + +// Taken from `std.net.curl` +private Tuple!(size_t,Char[]) decodeString(Char = char)(const(ubyte)[] data, + EncodingScheme scheme, size_t maxChars = size_t.max) +{ + import std.encoding : INVALID_SEQUENCE; + Char[] res; + immutable startLen = data.length; + size_t charsDecoded = 0; + while (data.length && charsDecoded < maxChars) + { + immutable dchar dc = scheme.safeDecode(data); + if (dc == INVALID_SEQUENCE) + { + return typeof(return)(size_t.max, cast(Char[]) null); + } + charsDecoded++; + res ~= dc; + } + return typeof(return)(startLen-data.length, res); +} diff --git a/source/dub/index/data.d b/source/dub/index/data.d new file mode 100644 index 0000000000..0a2d5ab4ac --- /dev/null +++ b/source/dub/index/data.d @@ -0,0 +1,228 @@ +/******************************************************************************* + + Defines the configuration file format for Dub's index + + The index is a replacement for the registry that relies entirely on the + state of a Git repository, using a similar approach as Nix packages, Rust's + cargo, and Homebrew. + + As Dub needs data that would otherwise force users to duplicate the content + of their recipe file, leading to sub-par experience, the package + registration process is simply to add a description for the package in + a configuration file, that is then processed by a special dub command to + generate the index. This also allows us to extend the index in the future + to support more data. + + This module contains type definitions for both the index file and the + processed data types. All processed types start with `Indexed` to + differentiate them. + +*******************************************************************************/ + +module dub.index.data; + +import dub.dependency; +import dub.internal.configy.attributes; +import dub.internal.vibecompat.data.json; +import dub.internal.vibecompat.inet.path; +import dub.recipe.packagerecipe; + +/** + * Top level configuration for package index + * + * The source of truth for all packages registered with the Dub registry. + * Each entry is a package description that gets processed by `dub index` + * subcommands. + */ +public struct PackageList { + public @Key(`name`) PackageEntry[] packages; +} + +/** + * A user-supplied description of a package, to be processed by `dub index`. + */ +public struct PackageEntry { + /// Short-hand name of the package + public MainPackageName name; + /// The location of the package - e.g. Github or GitLab + public PackageSource source; + /// The list of version ranges to be indexed - if not provided, everything + public VersionRange[] included = [ VersionRange.Any ]; + /// The list of version ranges to be excluded - if not provided, nothing + public @Optional VersionRange[] excluded; +} + +/** + * A struct to do validation on main package name + */ +public struct MainPackageName { + public string value; + alias value this; + + public static MainPackageName fromConfig (scope ConfigParser parser) { + import std.algorithm : all; + import std.ascii : isASCII; + import std.exception : enforce; + + if (scope sc = parser.node.asScalar) { + const str = sc.str; + enforce(str.length, "Empty package name"); + enforce(str.all!isASCII, "Package names need to be ASCII only"); + enforce(str[0] != ':', "Cannot register sub-package, use main package name instead"); + const name = PackageName(str); + enforce(!name.sub.length, "Sub-packages are automatically registered, register main package name only"); + string mainName = name.main.toString(); // Work around dumb DIP1000 error + return MainPackageName(mainName); + } + throw new Exception("Expected node to be a scalar"); + } +} + +/** + * The source from which an indexed package is fetched + * + * Packages can come from different sources. The most common ones + * are Github and GitLab, and other providers may be added. + */ +public struct PackageSource { + public Only!([`github`, `gitlab`, `bitbucket`]) kind; + public string owner; + public string project; + + /// JSON serialization for `kind` + public Json toJson () const @safe { + Json res = Json.emptyObject; + res["kind"] = this.kind.value; + res["owner"] = this.owner; + res["project"] = this.project; + return res; + } + + // Required for `toJson` not to be ignored + static PackageSource fromJson (Json src) @safe { assert(0); } +} + +/** + * A processed package description + * + * This describe a single package and is programmatically generated + * by `dub index generate`. + */ +public struct IndexedPackage (uint vers) { + /// Version of this index + public @Name(`version`) version_ = vers; + /// Copied verbatim from the index + public string name; + /// Description of the package + public string description; + /// Source of the package + public PackageSource source; + /// All known versions, sorted from most recent + public IndexedPackageVersion[] versions; + /// The cache information for the repository + /// + /// This is used to avoid needless refresh + public @Optional CacheInfo cache; +} + +public struct IndexedPackageVersion { + /// The version this represent + public @Name(`version`) Version version_; + + /** + * Description of all sub-package. + * + * The first entry is always the main package. + */ + public IndexedSubpackage[] subs; + + /** + * Git commit SHA-1 corresponding to this version + * + * This may be empty, and is only used for caching so far. + */ + public @Optional string commit; +} + +public struct IndexedSubpackage { + /// Name of this sub-package + public string name; + /** + * A stripped down copy of the package's configuration. + * + * Currently only used for the `dependencies`. + */ + public ConfigurationInfo[] configurations; + + /** + * Cache information for this version + * + * This is used to avoid needless refresh. The cache instance will + * be empty for subpackages that do not have their own package file, + * such as inline subpackages. Subpackages being specified by `path`, + * and the main package, will have this filled. + */ + public @Optional CacheInfo cache; + + /// If this is a path-based subpackage, the path at which this subpackage is + public @Optional InetPath path; + + /// JSON serialization for configurations + public Json toJson () const @safe { + import std.algorithm; + import std.range; + + Json res = Json.emptyObject; + res["name"] = this.name; + if (!this.path.empty) + res["path"] = this.path.toString(); + if (this.cache !is CacheInfo.init) { + res["cache"] = Json.emptyObject; + if (this.cache.etag.length) + res["cache"]["etag"] = this.cache.etag; + if (this.cache.last_modified.length) + res["cache"]["last_modified"] = this.cache.last_modified; + } + if (!this.configurations.length) return res; + + Json[] cf = iota(this.configurations.length).map!(_ => Json.emptyObject).array; + foreach (idx, ref conf; this.configurations) { + cf[idx]["name"] = conf.name; + if (conf.platforms.length) + cf[idx]["platforms"] = serializeToJson(conf.platforms); + if (conf.dependencies.data.length) { + cf[idx]["dependencies"] = Json.emptyObject; + foreach (key, value; conf.dependencies.data) + cf[idx]["dependencies"][key] = serializeToJson(value.dependency); + } + } + res["configurations"] = cf; + return res; + } + + // Required for `toJson` not to be ignored + static IndexedSubpackage fromJson (Json src) @safe { assert(0); } +} + +/** + * Utility struct used for caching + * + * Note that this can be used for files accross versions, as if the recipe file + * does not change across versions, we will get a hit and can further reduce + * the number of requests we emit. + * + * Note that the ETag differ between unauthenticated requests and authenticated + * requests. TODO: Do they also differ depending on the user ? + * Finally, unauthenticated conditional requests still count towards rate + * limiting (for fully cached packages, we only issue 1 request). + * + * See_Also: + * https://docs.github.com/en/rest/using-the-rest-api/best-practices-for-using-the-rest-api?apiVersion=2022-11-28#use-conditional-requests-if-appropriate + */ +public struct CacheInfo { + /// Etags returned by the request (may be null) + public @Optional string etag; + + /// Last modified returned by the request (may be null) + public @Optional string last_modified; +} diff --git a/source/dub/index/github.d b/source/dub/index/github.d new file mode 100644 index 0000000000..9e2cd78ab9 --- /dev/null +++ b/source/dub/index/github.d @@ -0,0 +1,489 @@ +/******************************************************************************* + + Types and functions to interface with the Github API to generate an index + entry + + See_Also: + https://docs.github.com/en/rest/using-the-rest-api/best-practices-for-using-the-rest-api + +*******************************************************************************/ + +module dub.index.github; + +import dub.dependency; +import dub.index.client; +import dub.index.data; +import dub.index.utils; +import dub.internal.utils; +import dub.internal.logging; +import dub.internal.vibecompat.data.json; +import dub.internal.vibecompat.inet.path; +import dub.internal.vibecompat.inet.url; +import dub.internal.vibecompat.inet.urlencode; +import dub.package_ : packageInfoFiles; +import dub.recipe.io; +import dub.recipe.packagerecipe; + +import std.algorithm; +import std.exception; +import std.format; +import std.range; +import std.typecons; + +/** + * Using the Github API, generate a package description matching this package + * + * We try to cache things as much as possible to reduce the number of requests + * counting towards our rate limit. There are a few ways we do this: + * - As the tags are ordered from most to least recent, if getting the first + * page of tags is a cache hit, then we don't need to do anything + * - If the tag list have changed, it's possible (likely even) that most tags + * are already known to us. If we have an existing cache, we compare to that + * as much as possible, using the `commit` for versions, and the `cache` + * for versions and subpackages; + * - In many instances, the recipe do not change across tags, only the code. + * To improve caching, when there is no existing index, we use the cache + * (etag) of the most recent version when querying the recipe file so that + * we may get a hit. + * + * There are a couple pitfalls that hinder caching: + * - We cannot cache what recipe file is being used by the package, as things + * would then break if a package is currently using `dub.json` but was + * previously using `dub.sdl` (or the other way around). Hence we always need + * to do a request content (and only one) to the contents endpoint. + * - subpackages may be inline, but are likely to be using a path, hence for + * each package with path subpackages, we will need to query them even if + * there is a cache miss - as a version could have a change to one of the + * subpackage but not the main package. + * + * We use a similar caching strategy for subpackages (closest version). + * + * Params: + * gh = The Github client, used to make requests to the API + * pkg = The package entry as can be found in the index file + * existing = The existing entry for this package in the built index. + * This is used to avoid making too many requests to Github. + * If `null`, the package will be processed as if it was new. + * + * Returns: + * A completed / filled `IndexedPackage!0` describing the cached index entry + * for this package. + * + * Throws: + * If an error happened, e.g. the package is dead, the rate limit was reached, + * etc... + */ +public IndexedPackage!0 updateDescription (scope RepositoryClient client, + in PackageEntry pkg, Nullable!(IndexedPackage!0) existing) { + + // In this function we refer to two different sources for cache: + // 1) the existing version, which is what we currently have in the cache + // 2) the previous version, which is the closest version, if any + + // Category: Tags ? + // Popularity: Forks, stars; + // Need last commit to establish which are the most active; + + const hasExisting = !existing.isNull(); + auto tags = client.getTags(hasExisting ? existing.get().cache : CacheInfo.init); + if (tags.notModified) { + const cache = existing.get().cache; // Frontend bug if inline below + logInfo("[%s] Package was not modified (%s, %s)", pkg.name, cache.etag, cache.last_modified); + return existing.get(); + } + if (hasExisting) + logInfo("[%s] Found %s tags (already cached: %s entries)", pkg.name, + tags.result.length, existing.get().versions.length); + else + logInfo("[%s] Found %s tags (no cached entry exists)", pkg.name, + tags.result.length); + + typeof(return) result; + result.name = pkg.name; + result.source = pkg.source; + result.cache = tags.cache; + + foreach (tidx, tag; tags.result) { + logInfo("[%s] Processing tag %s (%s/%s)", pkg.name, tag.name, tidx, tags.result.length); + if (!isTagIncluded(pkg, tag.name)) { + logInfo("[%s] Skipping tag %s", pkg.name, tag.name); + continue; + } + + // Then make sure there is not an already cached version of this tag + // If a repository has new tags, we do not want to reprocess old tags + // if they haven't changed. + IndexedPackageVersion existingVersion = hasExisting ? existing.get().versions + // There should always be at least one subs (the main), but better + // safe than sorry in case someone messes with those files. + // Note: Compare tags by string to avoid matching tags with metadata + // with release ones. + .filter!(ipv => ipv.version_.toString() == tag.name[1 .. $] && ipv.subs.length) + // Default value in case this tag is not in `existing` + .chain(only(IndexedPackageVersion.init)).front + : IndexedPackageVersion.init; + + try + result.versions ~= handleTag(client, pkg.name, tag, + existingVersion, result.versions, result.description); + catch (Exception exc) + logError("[%s] Could not process tag '%s': %s", pkg.name, tag.name, exc.message()); + } + // Fall back to the repository description if no version of the recipe + // file contains one + if (!result.description.length) + result.description = client.getDescription(CacheInfo.init); + + return result; +} + +/** + * Handle a single tag + * + * This function is called once per tag to process. Any error will result + * in the tag (and only this tag) being skipped. + */ +private IndexedPackageVersion handleTag ( + scope RepositoryClient client, string pkgname, TagDescription tag, + IndexedPackageVersion existing, IndexedPackageVersion[] others, + ref string description) { + + CacheInfo main_cache = existing.subs.length ? + existing.subs[0].cache : CacheInfo.init; + + // Here we can simply check the commit - This is a more robust method + // than relying on ETags as we would need to check ETags for all package + // files (e.g. see unit-threaded or Vibe.d usage of subpackages). + if (tag.commit == existing.commit) { + logInfo("[%s] Tag %s was not modified: %s", pkgname, tag.name, + existing.commit); + return existing; + } + + // If there was no match, use the CacheInfo of the closest version + // If the recipe file hasn't changed between version, we'll get a match + // We need to keep track where we got the cache from though. + const bool hasExistingCache = main_cache !is CacheInfo.init; + InetPath recipePath = hasExistingCache ? existing.subs[0].path + : InetPath(`/`); + if (!hasExistingCache && others.length) { + main_cache = others[$ - 1].subs[0].cache; + recipePath = others[$ - 1].subs[0].path; + } + + auto recipeResult = client.findRecipe(recipePath, tag.name, main_cache); + if (recipeResult.notModified) { + if (hasExistingCache) { + // We have an existing cache but the commit didn't match. + // Perhaps the version was re-tagged on a different commit with + // the same recipe. + logWarn("[%s][%s] Cache match without commit match - The version was re-tagged?", + pkgname, tag.name); + logWarn("[%s][%s] Throwing away cache and processing anew...", pkgname, tag.name); + return handleTag(client, pkgname, tag, IndexedPackageVersion.init, others, description); + } + // We don't have an *existing* cache, but we have a *previous* one + assert(others.length, "Cache matched a previous version without previous version"); + return handleTagFromPrevious(client, pkgname, tag, others[$ - 1], recipePath); + } + + logInfo("[%s] Found new tag with new package: %s", pkgname, tag.name); + auto recipe = recipeResult.result; + enforce(recipe.subPackages.length < 200, + "Package has too many subpackages: %s".format(recipe.subPackages.length)); + if (!description.length && recipe.description.length) + description = recipe.description; + + auto subs = new IndexedSubpackage[1 + recipe.subPackages.length]; + subs[0] = makeSubR(null, recipeResult.cache, recipe, recipePath); + foreach (spidx, ref subpkg; recipe.subPackages) { + if (subpkg.path.length) { + // TODO: Try to find previous subpackage instead + // Might give us a path or a cache + auto path = InetPath(subpkg.path); + auto res = client.findRecipe(path, tag.name, CacheInfo.init); + enforce(res.result.name.length, "No response for subpackage"); + subs[1 + spidx] = makeSubR(res.result.name, res.cache, res.result, path); + } else { + subs[1 + spidx] = makeSubR(subpkg.recipe.name, CacheInfo.init, subpkg.recipe, InetPath.init); + } + } + return IndexedPackageVersion(Version(tag.name[1 .. $]), subs, tag.commit); +} + +/** + * Fetches data related to a tag taking into account a previous match + * + * In many instances, the recipe file does not change between versions. + * We take advantage of this fact by using the previous version's ETag + * when requesting a recipe to avoid needlessly fetching package recipe + * (as well as subpackage's) and reduce our API use. + */ +private IndexedPackageVersion handleTagFromPrevious (scope RepositoryClient client, + string pkgname, TagDescription tag, IndexedPackageVersion previous, + InetPath recipePath) { + logInfo("[%s] Found new tag '%s' with package matching '%s'", + pkgname, tag.name, previous.version_); + // A previous version matches - however if there are subpackages + // files we also need to check them as they might have changed. + auto subs = new IndexedSubpackage[previous.subs.length]; + subs[0] = previous.subs[0]; + // The path might be different though, but the client handles this + subs[0].path = recipePath; + foreach (spidx, ref subpkg; previous.subs[1 .. $]) { + if (!subpkg.path.empty) { + auto subPath = subpkg.path; + auto res = client.findRecipe(subPath, tag.name, subpkg.cache); + if (res.notModified) { + logInfo("[%s][%s] Subpackage '%s' matches previous version '%s'", + pkgname, tag.name, subpkg.name, previous.version_); + subs[1 + spidx] = subpkg; + } else { + logInfo("[%s][%s] Subpackage '%s' (%s) differs from previous version '%s' (%s)", + pkgname, tag.name, subpkg.name, subPath, previous.version_, subpkg.path); + enforce(res.result.name.length, "No response for subpackage"); + subs[1 + spidx] = makeSubR(res.result.name, res.cache, res.result, subPath); + } + } else { + logInfo("[%s][%s] Inline subpackage '%s' matches previous version '%s'", + pkgname, tag.name, subpkg.name, previous.version_); + subs[1 + spidx] = subpkg; + } + } + return IndexedPackageVersion(Version(tag.name[1 .. $]), subs, tag.commit); +} + +private IndexedSubpackage makeSubR (string name, in CacheInfo cache, + ref PackageRecipe recipe, InetPath path) { + return IndexedSubpackage(name, + [ ConfigurationInfo(null, null, recipe.buildSettings) ] ~ recipe.configurations, + cache, path); +} + +/** + * Base client to interact with Github API + * + * This client handles authentication, rate limiting, and caching. + * As we had ~2500 packages at the time of writing (2025-04) and the API is + * limited to 5000 requests / hour, with each package taking multiple requests, + * we needed a smart way to do regular refresh of packages, which this client + * handles by saving and comparing ETags for repositories. + */ +public class GithubClient : APIClient { + /** + * Construct an instance of a Github client + * + * Params: + * token = Optional token to use to authenticate requests. + * It's use is highly recommended as otherwise the rate limit is + * quite low (60 / hour instead of 5000 / hour). + * url = URL of the Github API to use, defaults to api.github.com + */ + public this (string token = null, string url = `https://api.github.com`) { + super(URL(url), token); + } + + /** + * Scoped client for repository-specific interaction + * + * This should be used via: + * ``` + * scope gh = new GithubClient(); + * scope repo = gh.new Repository("dlang", "dub"); + * // Now get information on repository at https://github.com/dlang/dub + * ``` + */ + public class Repository : RepositoryClient { + /// Path to the repository + private InetPath path; + /// Detailed tag data cache (or empty if not queried) + private Json[string] allTags; + + /** + * Construct an instance of this object + */ + public this (string owner, string project) @safe { + this.path = InetPath("repos/") ~ owner ~ project; + } + + /** + * Get the project description as filled in Github + * + * This can be used as a fallback if the recipe file does not + * contain a description. + */ + public override string getDescription (CacheInfo cache = CacheInfo.init) { + return this.outer.get(this.path, cache).result["description"].opt!string; + } + + /** + * Get all the tags for this repository. + * + * Returns: + * A `null` `Nullable` if there was a cache hit, the list of tags + * otherwise. + * + * See_Also: + * https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-repository-tags + */ + public override RequestResult!(TagDescription[]) getTags (CacheInfo cache) { + static TagDescription[] jsonToTag (Json[] data) { + return data.map!(t => + TagDescription(t["name"].opt!string, t["commit"]["sha"].opt!string)) + .array; + } + + TagDescription[] getTagsInternal (string url) { + if (!url.length) return null; + auto res = this.outer.get(URL(url)); + auto ret = jsonToTag(res.result.parseJson().opt!(Json[])); + if (res.next.length) + return ret ~ getTagsInternal(res.next); + return ret; + } + + auto res = this.outer.get(this.path ~ "tags", cache, "per_page=100"); + if (res.notModified) return res.convert!(TagDescription[])(null); + return res.convert(jsonToTag(res.result.opt!(Json[])) ~ getTagsInternal(res.next)); + } + + /** + * Get data on a specific tag + * + * See_Also: + * https://docs.github.com/en/rest/git/tags?apiVersion=2022-11-28#get-a-tag + */ + public Json getTag (string name) { + if (scope resp = name in this.allTags) + return *resp; + auto res = this.outer.get(this.path ~ "tags" ~ name, CacheInfo.init); + this.allTags[name] = res.result; + return res.result; + } + + /** + * Get a Json object describing the recipe file for this reference + * + * We do not know ahead of time what type of recipe file a project uses, + * as it could be `dub.json`, `dub.sdl`, or even `package.json`. + * This endpoint will iterate through the list and find the file that + * dub would have picked, and returns the JSON API object describing it. + * Of interest in that object are the property `download_url` (to get + * the raw content), and `name` / `path`. + * + * Params: + * reference = The reference at which the file is looked up, e.g. + * `master` or a tag such as `v1.0.0`. + * cache = Cache information about this recipe file, if any. + * where = Where to look for a recipe file. Can be used to look up + * subpackages. + * + * Returns: + * Either a filled `RequestResult!Json` or one in the empty state if + * no recipe file was found. + * + * See_Also: + * https://docs.github.com/en/rest/repos/contents + */ + public RequestResult!Json findRecipeSummary ( + string reference, CacheInfo cache, InetPath where) { + + // We are either looking up the root directory or a subdirectory + // (for subpackages), in both cases we get the directory content. + if (where.absolute) { + auto segments = where.bySegment(); + segments.popFront(); + where = InetPath(segments); + assert(!where.absolute); + } + + scope res = this.outer.get((this.path ~ "contents/" ~ where).normalized(), + cache, "ref=" ~ urlEncode(reference)); + if (res.notModified) return res.convert(Json.emptyObject); + auto dir = res.result.opt!(Json[]); + foreach (info; packageInfoFiles) { + auto resrng = dir.find!(entry => entry["name"].opt!string == info.filename); + if (!resrng.empty) + return res.convert(resrng.front); + } + return res.convert(Json.emptyObject); + } + + /** + * Get the deserialized `PackageRecipe` + * + * This finds the package recipe in the repository (by calling + * `findRecipeSummary`), then fetches and deserialize it. + * + * Params: + * path = This can be either where to look for the package file, + * or if the path is to a package file itself (ends with one + * of the recognized package name), where to look it up. + * If the package file is not found, this will fall back + * to calling `findRecipeSummary`. + * If the recipe path doesn't match, this value will be + * changed to match the actual path. + * reference = The reference at which the file is looked up, e.g. + * `master` or a tag such as `v1.0.0`. + * cache = Cache information about this recipe file, if any. + */ + public override RequestResult!PackageRecipe findRecipe ( + ref InetPath path, string reference, CacheInfo cache) { + auto localPath = path; + if (!packageInfoFiles.filter!(pif => path.head == pif.filename).empty) { + try + return this.getRecipe(path, reference, cache); + catch (Exception exc) + localPath = path.hasParentPath() ? path.parentPath() : InetPath(`/`); + } + + // We don't cache `findRecipeSummary`, and if we reached this branch + // our caching information is useless anyway as we didn't have the + // right path. + auto res = this.findRecipeSummary(reference, CacheInfo.init, localPath); + enforce(res.result != Json.emptyObject, + "Could not find recipe file at '%s' in repository".format(path)); + path = InetPath(res.result["path"].opt!string); + return this.getRecipe(path, reference, CacheInfo.init); + } + + /** + * Get a recipe at a specified `path` + * + * Fetches and deserializes the package recipe at `path`. + * + * Params: + * path = Path to the recipe to fetch + * reference = Git reference at which to fetch + * cache = Caching information + * + * Throws: + * If no recipe exists, or if an underlying error happens. + */ + public RequestResult!PackageRecipe getRecipe (InetPath path, string reference, + CacheInfo cache) { + import std.base64; + import std.string : lineSplitter; + import std.utf; + + auto res = this.outer.get((this.path ~ "contents/" ~ path), + cache, "ref=" ~ urlEncode(reference)); + if (res.notModified) + return res.convert(PackageRecipe.init); + // Only support recipe files up to 1 Mb. Fair ? + const size = res.result["size"].opt!uint; + enforce(size < 1_000_000, + "Recipe file size is over 1 Megabyte: %s bytes".format(size)); + ubyte[] buffer = new ubyte[size]; + auto slice = buffer; + foreach (line; res.result["content"].opt!string.lineSplitter) { + const decoded = Base64.decode(line, slice); + enforce(decoded.length <= slice.length, "Reading past end of buffer?"); + slice = slice[decoded.length .. $]; + } + const str = cast(string) buffer[0 .. $ - slice.length]; + validate(str); + return res.convert(parsePackageRecipe(str, path.toString())); + } + } +} diff --git a/source/dub/index/gitlab.d b/source/dub/index/gitlab.d new file mode 100644 index 0000000000..50463ef800 --- /dev/null +++ b/source/dub/index/gitlab.d @@ -0,0 +1,233 @@ +/******************************************************************************* + + Types and functions to interface with the GitLab API to generate an index + entry + + As a lot of code for the GitLab API can be abstracted away to use a similar + logic as the GitHub API, this only implements the clients. + + See_Also: + https://docs.gitlab.com/api/rest/ + +*******************************************************************************/ + +module dub.index.gitlab; + +import dub.index.client; +import dub.index.data; +import dub.internal.vibecompat.data.json; +import dub.internal.vibecompat.inet.path; +import dub.internal.vibecompat.inet.url; +import dub.internal.vibecompat.inet.urlencode; +import dub.package_; +import dub.recipe.io; +import dub.recipe.packagerecipe; + +import std.algorithm; +import std.array; +import std.exception; +import std.format; +import std.typecons; + +/** + * Base client to interact with GitLab API + */ +public class GitLabClient : APIClient { + /** + * Construct an instance of a GitLab client + * + * Params: + * token = Optional token to use to authenticate requests. + * url = URL of the GitLab API to use, defaults to gitlab.com's + */ + public this (string token = null, string url = `https://gitlab.com/api/v4/`) { + // TODO: Token support + super(URL(url), token); + } + + /** + * Scoped client for project-specific interaction + * + * This should be used via: + * ``` + * scope client = new GitLabClient(); + * scope repo = client.new Project("dlang", "dub"); + * // Now get information on repository at https://gitlab.com/dlang/dub + * ``` + */ + public class Project : RepositoryClient { + /// Path to the project + private InetPath path; + + /** + * Construct an instance of this object + */ + public this (string owner, string project) @safe { + this.path = InetPath("projects") ~ urlEncode( + (InetPath(owner) ~ project).toString()); + } + + /** + * Get the project description as filled in Github + * + * This can be used as a fallback if the recipe file does not + * contain a description. + */ + public override string getDescription (CacheInfo cache = CacheInfo.init) { + return this.outer.get(this.path, cache).result["description"].opt!string; + } + + /** + * Get all the tags for this repository. + * + * Returns: + * A `null` `Nullable` if there was a cache hit, the list of tags + * otherwise. + * + * See_Also: + * https://docs.gitlab.com/api/tags/ + */ + public override RequestResult!(TagDescription[]) getTags (CacheInfo cache) { + static TagDescription[] jsonToTag (Json[] data) { + return data.map!(t => + TagDescription(t["name"].opt!string, t["commit"]["id"].opt!string)) + .array; + } + + TagDescription[] getTagsInternal (string url) { + if (!url.length) return null; + auto res = this.outer.get(URL(url)); + auto ret = jsonToTag(res.result.parseJson().opt!(Json[])); + if (res.next.length) + return ret ~ getTagsInternal(res.next); + return ret; + } + + auto res = this.outer.get(this.path ~ "repository" ~ "tags", cache); + if (res.notModified) return res.convert!(TagDescription[])(null); + return res.convert(jsonToTag(res.result.opt!(Json[])) ~ getTagsInternal(res.next)); + } + + /** + * Get a Json object describing the recipe file for this reference + * + * We do not know ahead of time what type of recipe file a project uses, + * as it could be `dub.json`, `dub.sdl`, or even `package.json`. + * This endpoint will iterate through the list and find the file that + * dub would have picked, and returns the JSON API object describing it. + * Of interest in that object are the property `download_url` (to get + * the raw content), and `name` / `path`. + * + * Params: + * reference = The reference at which the file is looked up, e.g. + * `master` or a tag such as `v1.0.0`. + * cache = Cache information about this recipe file, if any. + * where = Where to look for a recipe file. Can be used to look up + * subpackages. + * + * Returns: + * Either a filled `RequestResult!Json` or one in the empty state if + * no recipe file was found. + * + * See_Also: + * https://docs.gitlab.com/api/repositories/#list-repository-tree + */ + public RequestResult!Json findRecipeSummary (string reference, CacheInfo cache, + InetPath where) { + + // We are either looking up the root directory or a subdirectory + // (for subpackages), in both cases we get the directory content. + if (where.absolute) { + auto segments = where.bySegment(); + segments.popFront(); + where = InetPath(segments); + assert(!where.absolute); + } + + scope res = this.outer.get( + (this.path ~ "repository" ~ "tree").normalized(), + cache, "ref=%s&path=%s".format(urlEncode(reference), + urlEncode(where.empty ? "/" : where.toString()))); + if (res.notModified) return res.convert(Json.emptyObject); + auto dir = res.result.opt!(Json[]); + foreach (info; packageInfoFiles) { + auto resrng = dir.find!(entry => entry["name"].opt!string == info.filename); + if (!resrng.empty) + return res.convert(resrng.front); + } + return res.convert(Json.emptyObject); + } + + /** + * Get the deserialized `PackageRecipe` + * + * This finds the package recipe in the repository (by calling + * `findRecipeSummary`), then fetches and deserialize it. + * + * Params: + * path = This can be either where to look for the package file, + * or if the path is to a package file itself (ends with one + * of the recognized package name), where to look it up. + * If the package file is not found, this will fall back + * to calling `findRecipeSummary`. + * If the recipe path doesn't match, this value will be + * changed to match the actual path. + * reference = The reference at which the file is looked up, e.g. + * `master` or a tag such as `v1.0.0`. + * cache = Cache information about this recipe file, if any. + */ + public override RequestResult!PackageRecipe findRecipe (ref InetPath path, + string reference, CacheInfo cache) { + auto localPath = path; + if (!packageInfoFiles.filter!(pif => path.head == pif.filename).empty) { + try + return this.getRecipe(path, reference, cache); + catch (Exception exc) + localPath = path.hasParentPath() ? path.parentPath() : InetPath(`/`); + } + + // We don't cache `findRecipeSummary`, and if we reached this branch + // our caching information is useless anyway as we didn't have the + // right path. + auto res = this.findRecipeSummary(reference, CacheInfo.init, localPath); + enforce(res.result != Json.emptyObject, + "Could not find recipe file at '%s' in repository".format(path)); + path = InetPath(res.result["path"].opt!string); + return this.getRecipe(path, reference, CacheInfo.init); + } + + /** + * Get a recipe at a specified `path` + * + * Fetches and deserializes the package recipe at `path`. + * + * Params: + * path = Path to the recipe to fetch + * reference = Git reference at which to fetch + * cache = Caching information + * + * Throws: + * If no recipe exists, or if an underlying error happens. + */ + public RequestResult!PackageRecipe getRecipe (InetPath path, string reference, + CacheInfo cache) { + import std.base64; + import std.string : lineSplitter; + import std.utf; + + const wh = urlEncode(path.toString()); + scope res = this.outer.get((this.path ~ "repository" ~ "files" ~ wh).normalized(), + cache, "ref=" ~ urlEncode(reference)); + if (res.notModified) + return res.convert(PackageRecipe.init); + // Only support recipe files up to 1 Mb. Fair ? + const size = res.result["size"].opt!uint; + enforce(size < 1_000_000, + "Recipe file size is over 1 Megabyte: %s bytes".format(size)); + ubyte[] buffer = new ubyte[size]; + const str = cast(string) Base64.decode(res.result["content"].opt!string, buffer); + validate(str); + return res.convert(parsePackageRecipe(str, path.toString())); + } + } +} diff --git a/source/dub/index/utils.d b/source/dub/index/utils.d new file mode 100644 index 0000000000..85bbff1fcd --- /dev/null +++ b/source/dub/index/utils.d @@ -0,0 +1,113 @@ +/******************************************************************************* + + Utility functions for dealing with the index + +*******************************************************************************/ + +module dub.index.utils; + +import dub.dependency; +import dub.index.data; +import dub.internal.vibecompat.inet.path; + +import std.exception; +static import std.file; +import std.format; +import std.typecons; + +/** + * Loads a package description from the index. + * + * This attempts to load a package description from the index. + * If no such description exists, an `Exception` is thrown. + * + * Params: + * path = The base path of the index + * name = The name of the package to load a description for. + */ +public IndexedPackage!0 loadPackageDesc (in NativePath path, in PackageName name) { + import dub.internal.configy.easy; + + const file = getPackageDescriptionPath(path, name).toNativeString(); + enforce(std.file.exists(file), "No such package: %s".format(name)); + return parseConfigString!(typeof(return))(std.file.readText(file), file); +} + +/** + * Gets the path the package should be indexed at. + * + * The index use a simple system where the first two letters of the package, + * and the last two letters reversed are used. So for package `configy`, + * the path would be `$BASE/co/yg/configy`. For `dub`, `$BASE/du/bu/dub`. + * This scheme is used as it gives slightly better distribution. + * + * Params: + * path = The base path of the index + * name = The name of the package to get the path for. + * + * Returns: + * The path at which the package description should be stored. + */ +public NativePath getPackageDescriptionPath (in NativePath path, in PackageName name) { + import std.range; + + const main = name.main.toString(); + if (main.length < 2) + return (path ~ main ~ main ~ main); + + immutable char[2] end = [ main[$-1], main[$-2] ]; + return (path ~ main[0 .. 2] ~ end[] ~ main); +} + +/** + * From a package description, find the version that best matches the range + * + * Params: + * pkg = The package description to look at + * dep = The expected version range to match + * + * Returns: + * The highest version matching `dep`, or `nullable()` if none does. + */ +public Nullable!(const(IndexedPackageVersion)) bestMatch ( + in IndexedPackage!0 pkg, in VersionRange dep) { + size_t idx = pkg.versions.length; + foreach (eidx, ref vers; pkg.versions) { + // Is it a match ? + if (dep.matches(vers.version_)) { + if (idx < pkg.versions.length) { + // Is it a better match ? + if (pkg.versions[idx].version_ < vers.version_) + eidx = idx; + } else { + // We don't have a match yet + eidx = idx; + } + } + } + return idx < pkg.versions.length ? nullable(pkg.versions[idx]) : typeof(return).init; +} + +/** + * Checks whether a version should be processed or not + * + * Returns: + * Whether the package should be included. If any error happen, including + * if the tag does not start with the prefix `v`, `false` is returned. + */ +package bool isTagIncluded (in PackageEntry pkg, string name) nothrow { + import std.algorithm.searching : startsWith; + + if (!name.startsWith("v")) return false; + try { + auto vers = Version(name[1 .. $]); + foreach (excl; pkg.excluded) + if (excl.matches(vers)) + return false; + foreach (incl; pkg.included) + if (incl.matches(vers)) + return true; + return false; + } catch (Exception exc) + return false; +} diff --git a/source/dub/internal/configy/backend/json.d b/source/dub/internal/configy/backend/json.d new file mode 100644 index 0000000000..91715eec90 --- /dev/null +++ b/source/dub/internal/configy/backend/json.d @@ -0,0 +1,106 @@ +/******************************************************************************* + + A backend using `dub.internal.vibecompat.data.json` as parser. + + This is a more JSON-compliant parser than the YAML one (which is used for + parsing `dub.json` files) however it doesn't provide location informations + that are as good as the YAML parser, hence it is only used for internal + files that may have 'special' UTF-8 characters. + + See_Also: + https://github.com/dlang-community/D-YAML/issues/342 + +*******************************************************************************/ + +module dub.internal.configy.backend.json; + +import dub.internal.configy.backend.node; +import dub.internal.configy.utils; +import dub.internal.vibecompat.data.json; + +import std.algorithm : among; +import std.exception; +import std.format; + +/******************************************************************************* + + A wrapper around a JSON node + + This node is all types at once, however users should use one of the `asX` + method to get some useful value out of it. + +*******************************************************************************/ + +public class JSONNode : Node, Mapping, Sequence, Scalar { + /// Underlying data + private Json n; + /// + private string file; + + public this (Json node, string file) @safe { + this.n = node; + this.file = file; + } + + public override Location location () const scope @safe nothrow { + // No column information + version (JsonLineNumbers) + return Location(this.file, this.n.line); + else + return Location(this.file, 0); + } + + public override Type type () const scope @safe nothrow { + final switch (this.n.type()) { + // Treat `null` and `undefined` as empty mapping + case Json.Type.null_: + case Json.Type.undefined: + case Json.Type.object: + return Node.Type.Mapping; + case Json.Type.array: + return Node.Type.Sequence; + case Json.Type.bool_: + case Json.Type.int_: + case Json.Type.bigInt: + case Json.Type.float_: + case Json.Type.string: + return Node.Type.Scalar; + } + } + + public override inout(Mapping) asMapping () inout scope return @safe { + return this.type() == Node.Type.Mapping ? this : null; + } + public override inout(Sequence) asSequence () inout scope return @safe { + return this.type() == Node.Type.Sequence ? this : null; + } + public override inout(Scalar) asScalar () inout scope return @safe { + // undefined and null can be used as scalar if one so desire + return this.n.type().among(Json.Type.object, Json.Type.array) ? null : this; + } + + public override string str () const scope return @safe { + return this.n.to!string; + } + public override size_t length () const scope @safe { + return this.n.length(); + } + public override int opApply (scope MapIterator dg) scope { + foreach (ref string idx, ref Json val; this.n) { + scope nIdx = new JSONNode(Json(idx), this.file); + version (JsonLineNumbers) nIdx.n.line = val.line; + scope Node nVal = new JSONNode(val, this.file); + if (auto res = dg(nIdx, nVal)) + return res; + } + return 0; + } + public override int opApply (scope SeqIterator dg) scope { + foreach (size_t idx, ref Json val; this.n) { + scope nVal = new JSONNode(val, this.file); + if (auto res = dg(idx, nVal)) + return res; + } + return 0; + } +} diff --git a/source/dub/internal/configy/easy.d b/source/dub/internal/configy/easy.d index 19aa18f512..c2b03f8ede 100644 --- a/source/dub/internal/configy/easy.d +++ b/source/dub/internal/configy/easy.d @@ -205,3 +205,18 @@ public ConfigT parseConfigString (ConfigT) auto root = parseString(data, args.config_path); return parseConfig!ConfigT(root, strict); } + +/// ditto +public ConfigT parseConfigStringJSON (ConfigT) + (string data, string path, StrictMode strict = StrictMode.Error) +{ + import std.exception; + import dub.internal.configy.backend.json; + import dub.internal.vibecompat.data.json; + + assert(path.length, "No path provided to parseConfigStringJSON"); + auto root = new JSONNode(parseJsonString(data, path), path).asMapping(); + enforce(root !is null, "Parsing '" ~ path ~ "' didn't yield an object"); + return parseConfig!ConfigT(root, strict); + +} diff --git a/source/dub/internal/git.d b/source/dub/internal/git.d index e8913a2b1d..835770f32b 100644 --- a/source/dub/internal/git.d +++ b/source/dub/internal/git.d @@ -141,6 +141,11 @@ unittest { Returns: Whether the cloning succeeded. */ +bool cloneRepository(string remote, string reference, NativePath destination) { + return cloneRepository(remote, reference, destination.toNativeString()); +} + +/// Ditto bool cloneRepository(string remote, string reference, string destination) { import std.process : Pid, spawnProcess, wait; @@ -168,3 +173,20 @@ bool cloneRepository(string remote, string reference, string destination) return true; } + +/** + * Upgrade an already checked out repository to the latest version. + */ +public bool updateRepository (NativePath path, string reference, string remote = "origin") { + import std.process : spawnProcess, wait; + + const pathStr = path.toNativeString(); + string[] args = [ "git", "-C", pathStr, "fetch", remote ]; + if (getLogLevel > LogLevel.diagnostic) args ~= "-q"; + if (wait(spawnProcess(args)) != 0) + return false; + + args = [ "git", "-C", pathStr, "checkout", "--detach" ]; + if (getLogLevel > LogLevel.diagnostic) args ~= "-q"; + return (wait(spawnProcess(args ~ reference)) == 0); +} diff --git a/source/dub/packagesuppliers/index.d b/source/dub/packagesuppliers/index.d new file mode 100644 index 0000000000..791ac485a2 --- /dev/null +++ b/source/dub/packagesuppliers/index.d @@ -0,0 +1,210 @@ +/******************************************************************************* + + Index-based registry + + This implementation superseeds the legacy registry and queries Github + directly for a well-known index that is cloned locally and always + accessible. This ensures that our only dependency for fetching packages + is Github, reducing the risk of downtime. This supplier also soft-fail when + the network is not available, ensuring that users even offline can perform + search if they have a checked out (but possibly outdated) index. + + As most use cases already know the package they are looking for, this index + does not maintain an index, and `dub search` will simply open all package + indices on the filesystem. + +*******************************************************************************/ + +module dub.packagesuppliers.index; + +import dub.dependency; +import dub.index.data; +import dub.index.utils; +import dub.internal.configy.easy; +import dub.internal.logging; +import dub.internal.utils; +import dub.internal.vibecompat.inet.path; +import dub.internal.vibecompat.inet.url; +import dub.packagesuppliers.packagesupplier; +import dub.recipe.packagerecipe; + +import std.algorithm; +import std.array : array; +import std.exception; +static import std.file; +import std.format; +import std.range : retro; +import std.string; +import std.typecons; + +/// Ditto +public class IndexPackageSupplier : PackageSupplier { + /// The origin of the index - only used for initial checkout + protected URL url; + /// The path at which the index resides + protected NativePath path; + /// Whether git clone or git pull has been called during this program's + /// lifetime (it is called at most once). + protected bool initialized; + + /*************************************************************************** + + Instantiate a new `IndexPackageSupplier` + + Params: + path = The root path where the index is (to be) checked out + + ***************************************************************************/ + + public this (URL url, NativePath path) @safe pure nothrow { + this.url = url; + this.path = path; + this.path.endsWithSlash = true; + } + + /// + public override @property string description () { + return "index-based registry (" ~ this.url.toString() ~ ` => ` ~ + this.path.toNativeString() ~ ")"; + } + + /// + public override Version[] getVersions (in PackageName name) { + this.ensureInitialized(); + const pkg = loadPackageDesc(this.path, name); + return pkg.versions.map!(vers => Version(vers.version_.toString())).array; + } + + /** + * Fetch a package directly from the provider. + * + * See_Also: + * - https://docs.github.com/en/rest/repos/contents?apiVersion=2022-11-28#download-a-repository-archive-zip + * - https://docs.gitlab.com/api/repositories/ + * - https://support.atlassian.com/bitbucket-cloud/kb/how-to-download-repositories-using-the-api/ + */ + public override ubyte[] fetchPackage (in PackageName name, + in VersionRange dep, bool pre_release) { + import dub.internal.git; + import dub.internal.vibecompat.inet.urlencode; + + this.ensureInitialized(); + const pkgdesc = loadPackageDesc(this.path, name); + auto vers = pkgdesc.bestMatch(dep); + enforce(!vers.isNull(), "No package found matching dep"); + switch (pkgdesc.source.kind) { + case "github": + const url = "https://api.github.com/repos/%s/%s/zipball/%s".format( + pkgdesc.source.owner, pkgdesc.source.project, vers.get()); + return retryDownload(URL(url)); + case "gitlab": + const url = "https://gitlab.com/api/v4/projects/%s/repository/archive.zip?sha=%s".format( + urlEncode((InetPath(pkgdesc.source.owner) ~ pkgdesc.source.project).toString()), + vers.get()); + return retryDownload(URL(url)); + case "bitbucket": + const url = "https://bitbucket.org/%s/%s/get/%s.zip".format( + pkgdesc.source.owner, pkgdesc.source.project, vers.get()); + return retryDownload(URL(url)); + default: + throw new Exception("Unhandled repository kind: " ~ pkgdesc.source.kind); + } + } + + /// + public override Json fetchPackageRecipe(in PackageName name, + in VersionRange dep, bool pre_release) { + this.ensureInitialized(); + const pkgdesc = loadPackageDesc(this.path, name); + const vers = pkgdesc.bestMatch(dep); + enforce(!vers.isNull(), + "Cannot fetch version '%s' of package '%s': No such version exists" + .format(dep, name)); + // Note: Only 'version' is used from the return of 'fetchPackageRecipe' + Json res; + res["version"] = vers.get().version_.toString(); + return res; + } + + /** + * Search all packages matching the query + * + * Note that it is an expensive operation as it iterates over the whole + * index locally. This is currently only called from `dub search` and + * is unlikely to be called from any long-running processed so we're + * not concerned about memory usage / speed (a couple seconds is fine). + */ + public override SearchResult[] searchPackages (string query) { + import std.path; + + static SearchResult addPackage (in IndexedPackage!0 idx) { + auto maxVers = idx.bestMatch(VersionRange.Any); + return SearchResult(idx.name, idx.description, + !maxVers.isNull() ? maxVers.get().version_.toString() : null); + } + + typeof(return) results; + this.ensureInitialized(); + const origSound = query.soundexer(); + foreach (directory; std.file.dirEntries(this.path.toNativeString(), std.file.SpanMode.shallow)) { + // Exclude any README, the .git directory / any dot files + if (!directory.isDir()) continue; + if (directory.baseName.length != 2) continue; + foreach (entry; std.file.dirEntries(directory.name, std.file.SpanMode.breadth)) { + if (!entry.isFile()) continue; + try { + // D-YAML can't process some of the UTF-8 we might find in + // descriptions, so we use a pure JSON backend with no line + // information. + // See https://github.com/dlang-community/D-YAML/issues/342 + const desc = parseConfigStringJSON!(IndexedPackage!0)( + std.file.readText(entry.name), entry.name); + if (!desc.versions.length) continue; + // Some heuristics + if (desc.name.canFind(query) || desc.description.canFind(query)) + results ~= addPackage(desc); + else if (desc.name.soundexer == origSound) + results ~= addPackage(desc); + } catch (ConfigException exc) { + logWarn("[%s] Internal error while reading package index: %S", entry.name, exc); + continue; + } + catch (Exception exc) { + logWarn("[%s] Internal error while reading package index: %s", entry.name, exc); + continue; + } + } + } + return results; + } + + /** + * Called by every method to ensure the index is available and up to date + * + * This method will hard fail if no index is available and the index cannot + * be cloned, and soft-fail when the index cannot be updated. + * It ensures that the index is updated at most once per program invocation. + * + * Returns: + * Whether this was the first call to `ensureInitialized`. + * + * Throws: + * If cloning the index failed. + */ + public bool ensureInitialized () { + import dub.internal.git; + + if (this.initialized) return false; + scope (exit) this.initialized = true; + + if (!std.file.exists(this.path.toNativeString())) + enforce( + cloneRepository(this.url.toString(), "master", this.path), + "Cloning the repository failed - ensure you have a working internet " ~ + "connection or use `--skip-registry`"); + else { + updateRepository(this.path, "origin/HEAD"); + } + return true; + } +} diff --git a/source/dub/packagesuppliers/package.d b/source/dub/packagesuppliers/package.d index 5bb8057959..d2f6cac56a 100644 --- a/source/dub/packagesuppliers/package.d +++ b/source/dub/packagesuppliers/package.d @@ -5,6 +5,7 @@ module dub.packagesuppliers; */ public import dub.packagesuppliers.fallback; public import dub.packagesuppliers.filesystem; +public import dub.packagesuppliers.index; public import dub.packagesuppliers.packagesupplier; public import dub.packagesuppliers.maven; public import dub.packagesuppliers.registry; diff --git a/source/dub/packagesuppliers/registry.d b/source/dub/packagesuppliers/registry.d index 43fe508b62..d2a524c3a5 100644 --- a/source/dub/packagesuppliers/registry.d +++ b/source/dub/packagesuppliers/registry.d @@ -135,4 +135,33 @@ class RegistryPackageSupplier : PackageSupplier { .map!(j => SearchResult(j["name"].opt!string, j["description"].opt!string, j["version"].opt!string)) .array; } + + /** + * This gets a dump of all the packages + * + * As it is pretty expensive (fetches 140M of JSON as of 2025-04), avoid + * calling this. Only used for internal commands. + */ + package(dub) Json getPackageDump (bool force = false) { + static import std.file; + import dub.internal.logging; + + const cachePath = `registry-cache.json`; + auto url = this.m_registryUrl ~ InetPath("api/packages/dump"); + string data; + if (!force && std.file.exists(cachePath)) { + logInfo("Using `registry-cache.json` to avoid overloading the registry"); + data = std.file.readText(cachePath); + } + else { + if (!force) { + logWarn("Quering the registry for a list of packages - this is expensive, prefer caching"); + logWarn("Use caching with `curl https://code.dlang.org/api/packages/dump > %s`", cachePath); + } else { + logInfo("Querying the registry for a list of packages..."); + } + data = cast(string)retryDownload(url); + } + return data.parseJson(); + } }