Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions guidebooks/ml/codeflare/training/byot/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,15 @@ Submit the job.
export JOB_NAME=BYOT
```

```shell
```python
---
exec: ray job submit --job-id ${JOB_ID} --no-wait --runtime-env ${CUSTOM_WORKING_DIR}/runtime-env.yaml --working-dir ${CUSTOM_WORKING_DIR} --address ${RAY_ADDRESS} -- python main.py
#exec: ray job submit --job-id ${JOB_ID} --no-wait --runtime-env ${CUSTOM_WORKING_DIR}/runtime-env.yaml --working-dir ${CUSTOM_WORKING_DIR} --address ${RAY_ADDRESS} -- python main.py
#the below command is equivalent of this command
exec:
#assumes that all these variables are set already, and are cross checked via asserts in python
#JOB_ID=${JOB_ID} CUSTOM_WORKING_DIR=${CUSTOM_WORKING_DIR} RAY_ADDRESS=${RAY_ADDRESS}
---
--8<-- "./job_submission.py"
```

--8<-- "ml/ray/run/logs"
43 changes: 43 additions & 0 deletions guidebooks/ml/codeflare/training/byot/job_submission.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import os
from re import template
import yaml
from ray.job_submission import JobSubmissionClient

"""
uses the job SDK to submit the command
"""
def execute_job():
assigned_job_id = os.getenv("JOB_ID")
cluster_address = os.getenv("RAY_ADDRESS")
template_location = os.getenv("CUSTOM_WORKING_DIR")
config_file_location = f"{template_location}/config.yaml"

assert assigned_job_id is not None, "JOB_ID cannot be none"
assert cluster_address is not None, "RAY_ADDDRESS cannot be none"
assert template_location is not None, "RAY_ADDDRESS cannot be none"
assert os.path.isfile(config_file_location), f"config file config.yaml should be present at {template_location}"

with open(config_file_location, "r") as config_file:
print(f"reading configuration from {config_file_location}")
config = yaml.safe_load(config_file)
validate_config(config)
#set the working directory to be what is provided by the user
config["runtime_env"] = config.get("runtime_env", {})
config["runtime_env"]["working_dir"] = template_location

print(f"executing command ray job submit wiith job-id: {assigned_job_id} working_dir: ${template_location} address: {cluster_address}")
client = JobSubmissionClient(address=cluster_address)
job_id = client.submit_job(
job_id=assigned_job_id,
entrypoint=config['entrypoint'],
runtime_env=config['runtime_env'],
metadata=config['metadata']
)


def validate_config(config):
#print(f"config {config}")
assert config['entrypoint'], "entry point cannot be empty"

if __name__ == "__main__":
execute_job()
2 changes: 1 addition & 1 deletion guidebooks/ml/ray/install/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
---
validate: which ray
---
pip install -U "ray[default]"
pip install -U "ray[default]" pyyaml
```

```shell
Expand Down