Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 1 addition & 22 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,25 +100,4 @@ jobs:
# htcluster exec -u root -s worker -- hdfs dfs -get hdfs:///tmp/install_python.sh /home/testuser && \
# htcluster exec -u root -s worker -- chmod +x /home/testuser/install_python.sh && \
# htcluster exec -u root -s worker -- /home/testuser/install_python.sh && \
# htcluster exec -s edge -- /home/testuser/cluster-pack/tests/integration/hadoop_hdfs_tests.sh"

conda:
runs-on: ubuntu-latest

strategy:
matrix:
python-version: [3.9]

steps:
- uses: actions/checkout@v2

- name: Run tests with conda
run: |
conda update -y conda
conda create -n venv -y python=${{ matrix.python-version }}
# https://github.com/conda/conda/issues/7980
eval "$(conda shell.bash hook)"
conda activate venv
pip install .
pip install -r tests-requirements.txt
pytest -m conda -s tests --log-cli-level=INFO
# htcluster exec -s edge -- /home/testuser/cluster-pack/tests/integration/hadoop_hdfs_tests.sh"
1 change: 0 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,4 @@ include *.md
include LICENSE
include MANIFEST.in
include requirements.txt
include versioneer.py
include cluster_pack/_version.py
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# cluster-pack

cluster-pack is a library on top of either [pex][pex] or [conda-pack][conda-pack] to make your Python code easily available on a cluster.
cluster-pack is a library on top of either [pex][pex] to make your Python code easily available on a cluster.

Its goal is to make your prod/dev Python code & libraries easiliy available on any cluster. cluster-pack supports HDFS/S3 as a distributed storage.

Expand Down Expand Up @@ -32,7 +32,7 @@ cluster-pack supports Python ≥3.7.

## Features

- Ships a package with all the dependencies from your current virtual environment or your conda environment
- Ships a package with all the dependencies from your current virtual environment

- Stores metadata for an environment

Expand All @@ -59,6 +59,5 @@ cluster-pack supports Python ≥3.7.
2) [Docker with PySpark on S3](https://github.com/criteo/cluster-pack/blob/master/examples/spark-with-S3/README.md)

[pex]: https://github.com/pantsbuild/pex
[conda-pack]: https://github.com/conda/conda-pack
[editable_installs_mode]: https://pip.pypa.io/en/stable/reference/pip_install/#editable-installs
[skein]: https://jcrist.github.io/skein/
9 changes: 2 additions & 7 deletions cluster_pack/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
from cluster_pack.uploader import (
upload_env,
upload_zip,
upload_spec
)
from cluster_pack.uploader import upload_env, upload_zip, upload_spec

from cluster_pack.packaging import (
zip_path,
Expand All @@ -11,9 +7,8 @@
get_default_fs,
detect_packer_from_file,
Packer,
CONDA_PACKER,
PEX_PACKER,
get_pyenv_usage_from_archive
get_pyenv_usage_from_archive,
)

from cluster_pack.skein import skein_launcher as yarn_launcher
154 changes: 95 additions & 59 deletions cluster_pack/_version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

# This file helps to compute a version number in source trees obtained from
# git-archive tarball (such as those provided by githubs download-from-tag
# feature). Distribution tarballs (built by setup.py sdist) and build
Expand Down Expand Up @@ -58,28 +57,32 @@ class NotThisMethod(Exception):

def register_vcs_handler(vcs, method): # decorator
"""Decorator to mark a method as the handler for a particular VCS."""

def decorate(f):
"""Store f in HANDLERS[vcs][method]."""
if vcs not in HANDLERS:
HANDLERS[vcs] = {}
HANDLERS[vcs][method] = f
return f

return decorate


def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
env=None):
def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None):
"""Call the given command(s)."""
assert isinstance(commands, list)
p = None
for c in commands:
try:
dispcmd = str([c] + args)
# remember shell=False, so use git.cmd on windows, not just git
p = subprocess.Popen([c] + args, cwd=cwd, env=env,
stdout=subprocess.PIPE,
stderr=(subprocess.PIPE if hide_stderr
else None))
p = subprocess.Popen(
[c] + args,
cwd=cwd,
env=env,
stdout=subprocess.PIPE,
stderr=(subprocess.PIPE if hide_stderr else None),
)
break
except EnvironmentError:
e = sys.exc_info()[1]
Expand Down Expand Up @@ -116,16 +119,22 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
for i in range(3):
dirname = os.path.basename(root)
if dirname.startswith(parentdir_prefix):
return {"version": dirname[len(parentdir_prefix):],
"full-revisionid": None,
"dirty": False, "error": None, "date": None}
return {
"version": dirname[len(parentdir_prefix) :],
"full-revisionid": None,
"dirty": False,
"error": None,
"date": None,
}
else:
rootdirs.append(root)
root = os.path.dirname(root) # up a level

if verbose:
print("Tried directories %s but none started with prefix %s" %
(str(rootdirs), parentdir_prefix))
print(
"Tried directories %s but none started with prefix %s"
% (str(rootdirs), parentdir_prefix)
)
raise NotThisMethod("rootdir doesn't start with parentdir_prefix")


Expand Down Expand Up @@ -181,7 +190,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
TAG = "tag: "
tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)])
if not tags:
# Either we're using git < 1.8.3, or there really are no tags. We use
# a heuristic: assume all version tags have a digit. The old git %d
Expand All @@ -190,27 +199,34 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
# between branches and tags. By ignoring refnames without digits, we
# filter out many common branch names like "release" and
# "stabilization", as well as "HEAD" and "master".
tags = set([r for r in refs if re.search(r'\d', r)])
tags = set([r for r in refs if re.search(r"\d", r)])
if verbose:
print("discarding '%s', no digits" % ",".join(refs - tags))
if verbose:
print("likely tags: %s" % ",".join(sorted(tags)))
for ref in sorted(tags):
# sorting will prefer e.g. "2.0" over "2.0rc1"
if ref.startswith(tag_prefix):
r = ref[len(tag_prefix):]
r = ref[len(tag_prefix) :]
if verbose:
print("picking %s" % r)
return {"version": r,
"full-revisionid": keywords["full"].strip(),
"dirty": False, "error": None,
"date": date}
return {
"version": r,
"full-revisionid": keywords["full"].strip(),
"dirty": False,
"error": None,
"date": date,
}
# no suitable tags, so version is "0+unknown", but full hex is still there
if verbose:
print("no suitable tags, using unknown + full revision id")
return {"version": "0+unknown",
"full-revisionid": keywords["full"].strip(),
"dirty": False, "error": "no suitable tags", "date": None}
return {
"version": "0+unknown",
"full-revisionid": keywords["full"].strip(),
"dirty": False,
"error": "no suitable tags",
"date": None,
}


@register_vcs_handler("git", "pieces_from_vcs")
Expand All @@ -225,19 +241,27 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
if sys.platform == "win32":
GITS = ["git.cmd", "git.exe"]

out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
hide_stderr=True)
out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True)
if rc != 0:
if verbose:
print("Directory %s not under git control" % root)
raise NotThisMethod("'git rev-parse --git-dir' returned error")

# if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
# if there isn't one, this yields HEX[-dirty] (no NUM)
describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
"--always", "--long",
"--match", "%s*" % tag_prefix],
cwd=root)
describe_out, rc = run_command(
GITS,
[
"describe",
"--tags",
"--dirty",
"--always",
"--long",
"--match",
"%s*" % tag_prefix,
],
cwd=root,
)
# --long was added in git-1.5.5
if describe_out is None:
raise NotThisMethod("'git describe' failed")
Expand All @@ -260,17 +284,16 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
dirty = git_describe.endswith("-dirty")
pieces["dirty"] = dirty
if dirty:
git_describe = git_describe[:git_describe.rindex("-dirty")]
git_describe = git_describe[: git_describe.rindex("-dirty")]

# now we have TAG-NUM-gHEX or HEX

if "-" in git_describe:
# TAG-NUM-gHEX
mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
if not mo:
# unparseable. Maybe git-describe is misbehaving?
pieces["error"] = ("unable to parse git-describe output: '%s'"
% describe_out)
pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out
return pieces

# tag
Expand All @@ -279,10 +302,12 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
if verbose:
fmt = "tag '%s' doesn't start with prefix '%s'"
print(fmt % (full_tag, tag_prefix))
pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
% (full_tag, tag_prefix))
pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (
full_tag,
tag_prefix,
)
return pieces
pieces["closest-tag"] = full_tag[len(tag_prefix):]
pieces["closest-tag"] = full_tag[len(tag_prefix) :]

# distance: number of commits since tag
pieces["distance"] = int(mo.group(2))
Expand All @@ -293,13 +318,13 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
else:
# HEX: no tags
pieces["closest-tag"] = None
count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
cwd=root)
count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
pieces["distance"] = int(count_out) # total number of commits

# commit date: see ISO-8601 comment in git_versions_from_keywords()
date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
cwd=root)[0].strip()
date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[
0
].strip()
pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)

return pieces
Expand Down Expand Up @@ -330,8 +355,7 @@ def render_pep440(pieces):
rendered += ".dirty"
else:
# exception #1
rendered = "0+untagged.%d.g%s" % (pieces["distance"],
pieces["short"])
rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
return rendered
Expand Down Expand Up @@ -445,11 +469,13 @@ def render_git_describe_long(pieces):
def render(pieces, style):
"""Render the given version pieces into the requested style."""
if pieces["error"]:
return {"version": "unknown",
"full-revisionid": pieces.get("long"),
"dirty": None,
"error": pieces["error"],
"date": None}
return {
"version": "unknown",
"full-revisionid": pieces.get("long"),
"dirty": None,
"error": pieces["error"],
"date": None,
}

if not style or style == "default":
style = "pep440" # the default
Expand All @@ -469,9 +495,13 @@ def render(pieces, style):
else:
raise ValueError("unknown style '%s'" % style)

return {"version": rendered, "full-revisionid": pieces["long"],
"dirty": pieces["dirty"], "error": None,
"date": pieces.get("date")}
return {
"version": rendered,
"full-revisionid": pieces["long"],
"dirty": pieces["dirty"],
"error": None,
"date": pieces.get("date"),
}


def get_versions():
Expand All @@ -485,8 +515,7 @@ def get_versions():
verbose = cfg.verbose

try:
return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
verbose)
return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose)
except NotThisMethod:
pass

Expand All @@ -495,13 +524,16 @@ def get_versions():
# versionfile_source is the relative path from the top of the source
# tree (where the .git directory might live) to this file. Invert
# this to find the root from __file__.
for i in cfg.versionfile_source.split('/'):
for i in cfg.versionfile_source.split("/"):
root = os.path.dirname(root)
except NameError:
return {"version": "0+unknown", "full-revisionid": None,
"dirty": None,
"error": "unable to find root of source tree",
"date": None}
return {
"version": "0+unknown",
"full-revisionid": None,
"dirty": None,
"error": "unable to find root of source tree",
"date": None,
}

try:
pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
Expand All @@ -515,6 +547,10 @@ def get_versions():
except NotThisMethod:
pass

return {"version": "0+unknown", "full-revisionid": None,
"dirty": None,
"error": "unable to compute version", "date": None}
return {
"version": "0+unknown",
"full-revisionid": None,
"dirty": None,
"error": "unable to compute version",
"date": None,
}
Loading
Loading