From 3d3c718d552941f22dced979e9474e10eb1319b7 Mon Sep 17 00:00:00 2001 From: Atin Sood Date: Tue, 26 Jul 2022 17:45:37 -0400 Subject: [PATCH] using ray sdk vs cli for byot --- .../ml/codeflare/training/byot/index.md | 9 +++- .../codeflare/training/byot/job_submission.py | 43 +++++++++++++++++++ guidebooks/ml/ray/install/cli.md | 2 +- 3 files changed, 51 insertions(+), 3 deletions(-) create mode 100644 guidebooks/ml/codeflare/training/byot/job_submission.py diff --git a/guidebooks/ml/codeflare/training/byot/index.md b/guidebooks/ml/codeflare/training/byot/index.md index 009fc823..4bad1aec 100644 --- a/guidebooks/ml/codeflare/training/byot/index.md +++ b/guidebooks/ml/codeflare/training/byot/index.md @@ -19,10 +19,15 @@ Submit the job. export JOB_NAME=BYOT ``` -```shell +```python --- -exec: ray job submit --job-id ${JOB_ID} --no-wait --runtime-env ${CUSTOM_WORKING_DIR}/runtime-env.yaml --working-dir ${CUSTOM_WORKING_DIR} --address ${RAY_ADDRESS} -- python main.py +#exec: ray job submit --job-id ${JOB_ID} --no-wait --runtime-env ${CUSTOM_WORKING_DIR}/runtime-env.yaml --working-dir ${CUSTOM_WORKING_DIR} --address ${RAY_ADDRESS} -- python main.py +#the below command is equivalent of this command +exec: + #assumes that all these variables are set already, and are cross checked via asserts in python + #JOB_ID=${JOB_ID} CUSTOM_WORKING_DIR=${CUSTOM_WORKING_DIR} RAY_ADDRESS=${RAY_ADDRESS} --- +--8<-- "./job_submission.py" ``` --8<-- "ml/ray/run/logs" diff --git a/guidebooks/ml/codeflare/training/byot/job_submission.py b/guidebooks/ml/codeflare/training/byot/job_submission.py new file mode 100644 index 00000000..03ac14bd --- /dev/null +++ b/guidebooks/ml/codeflare/training/byot/job_submission.py @@ -0,0 +1,43 @@ +import os +from re import template +import yaml +from ray.job_submission import JobSubmissionClient + +""" + uses the job SDK to submit the command +""" +def execute_job(): + assigned_job_id = os.getenv("JOB_ID") + cluster_address = os.getenv("RAY_ADDRESS") + template_location = os.getenv("CUSTOM_WORKING_DIR") + config_file_location = f"{template_location}/config.yaml" + + assert assigned_job_id is not None, "JOB_ID cannot be none" + assert cluster_address is not None, "RAY_ADDDRESS cannot be none" + assert template_location is not None, "RAY_ADDDRESS cannot be none" + assert os.path.isfile(config_file_location), f"config file config.yaml should be present at {template_location}" + + with open(config_file_location, "r") as config_file: + print(f"reading configuration from {config_file_location}") + config = yaml.safe_load(config_file) + validate_config(config) + #set the working directory to be what is provided by the user + config["runtime_env"] = config.get("runtime_env", {}) + config["runtime_env"]["working_dir"] = template_location + + print(f"executing command ray job submit wiith job-id: {assigned_job_id} working_dir: ${template_location} address: {cluster_address}") + client = JobSubmissionClient(address=cluster_address) + job_id = client.submit_job( + job_id=assigned_job_id, + entrypoint=config['entrypoint'], + runtime_env=config['runtime_env'], + metadata=config['metadata'] + ) + + +def validate_config(config): + #print(f"config {config}") + assert config['entrypoint'], "entry point cannot be empty" + +if __name__ == "__main__": + execute_job() \ No newline at end of file diff --git a/guidebooks/ml/ray/install/cli.md b/guidebooks/ml/ray/install/cli.md index 29294b26..a20da699 100644 --- a/guidebooks/ml/ray/install/cli.md +++ b/guidebooks/ml/ray/install/cli.md @@ -4,7 +4,7 @@ --- validate: which ray --- -pip install -U "ray[default]" +pip install -U "ray[default]" pyyaml ``` ```shell