zenml-io
diff --git a/‎src/zenml/config/compiler.py‎
Lines changed: 2 additions & 1 deletion b/‎src/zenml/config/compiler.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/zenml/config/step_configurations.py‎
Lines changed: 7 additions & 0 deletions b/‎src/zenml/config/step_configurations.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/zenml/integrations/kubernetes/orchestrators/kubernetes_orchestrator.py‎
Lines changed: 285 additions & 3 deletions b/‎src/zenml/integrations/kubernetes/orchestrators/kubernetes_orchestrator.py‎
Lines changed: 285 additions & 3 deletions
@@ -153,6 +153,7 @@ def compile(
 
         snapshot = PipelineSnapshotBase(
             run_name_template=run_name,
+            is_dynamic=pipeline.is_dynamic,
             pipeline_configuration=pipeline.configuration,
             step_configurations=steps,
             client_environment=get_run_environment_dict(),
@@ -634,7 +635,7 @@ def _compute_pipeline_spec(
         Raises:
             ValueError: If the pipeline has no steps.
         """
-        if not step_specs:
+        if not step_specs and not pipeline.is_dynamic:
             raise ValueError(
                 f"Pipeline '{pipeline.name}' cannot be compiled because it has "
                 f"no steps. Please make sure that your steps are decorated "
 
@@ -214,6 +214,13 @@ class StepConfigurationUpdate(FrozenBaseModel):
         default=None,
         description="The cache policy for the step.",
     )
+    in_process: Optional[bool] = Field(
+        default=None,
+        description="Whether to run the step in process. This is only "
+        "applicable for dynamic pipelines. If not set, the step will by "
+        "default run in-process unless it requires a different Docker image "
+        "or has special resource requirements.",
+    )
 
     outputs: Mapping[str, PartialArtifactConfiguration] = {}
 
 
@@ -49,6 +49,7 @@
 from zenml.config.base_settings import BaseSettings
 from zenml.constants import (
     METADATA_ORCHESTRATOR_RUN_ID,
+    ORCHESTRATOR_DOCKER_IMAGE_KEY,
 )
 from zenml.enums import ExecutionMode, ExecutionStatus, StackComponentType
 from zenml.integrations.kubernetes.constants import (
@@ -80,6 +81,7 @@
 from zenml.stack import StackValidator
 
 if TYPE_CHECKING:
+    from zenml.config.step_run_info import StepRunInfo
     from zenml.models import (
         PipelineRunResponse,
         PipelineSnapshotBase,
@@ -110,7 +112,10 @@ def should_build_pipeline_image(
         settings = cast(
             KubernetesOrchestratorSettings, self.get_settings(snapshot)
         )
-        return settings.always_build_pipeline_image
+        if settings.always_build_pipeline_image:
+            return True
+        else:
+            return super().should_build_pipeline_image(snapshot)
 
     def get_kube_client(
         self, incluster: Optional[bool] = None
@@ -446,8 +451,6 @@ def submit_pipeline(
             KubernetesOrchestratorSettings, self.get_settings(snapshot)
         )
 
-        assert stack.container_registry
-
         # Get Docker image for the orchestrator pod
         try:
             image = self.get_image(snapshot=snapshot)
@@ -656,6 +659,285 @@ def _wait_for_run_to_finish() -> None:
                 )
                 return None
 
+    def submit_dynamic_pipeline(
+        self,
+        snapshot: "PipelineSnapshotResponse",
+        stack: "Stack",
+        environment: Dict[str, str],
+        placeholder_run: Optional["PipelineRunResponse"] = None,
+    ) -> Optional[SubmissionResult]:
+        """Submits a dynamic pipeline to the orchestrator."""
+        from zenml.pipelines.dynamic.entrypoint_configuration import (
+            DynamicPipelineEntrypointConfiguration,
+        )
+
+        pipeline_name = snapshot.pipeline_configuration.name
+        settings = cast(
+            KubernetesOrchestratorSettings, self.get_settings(snapshot)
+        )
+        image = self.get_image(snapshot=snapshot)
+
+        command = (
+            DynamicPipelineEntrypointConfiguration.get_entrypoint_command()
+        )
+        args = DynamicPipelineEntrypointConfiguration.get_entrypoint_arguments(
+            snapshot_id=snapshot.id,
+            run_id=placeholder_run.id if placeholder_run else None,
+        )
+
+        # Authorize pod to run Kubernetes commands inside the cluster.
+        service_account_name = self._get_service_account_name(settings)
+
+        # We set some default minimum resource requests for the orchestrator pod
+        # here if the user has not specified any, because the orchestrator pod
+        # takes up some memory resources itself and, if not specified, the pod
+        # will be scheduled on any node regardless of available memory and risk
+        # negatively impacting or even crashing the node due to memory pressure.
+        orchestrator_pod_settings = kube_utils.apply_default_resource_requests(
+            memory="400Mi",
+            cpu="100m",
+            pod_settings=settings.orchestrator_pod_settings,
+        )
+
+        if self.config.pass_zenml_token_as_secret:
+            secret_name = self.get_token_secret_name(snapshot.id)
+            token = environment.pop("ZENML_STORE_API_TOKEN")
+            kube_utils.create_or_update_secret(
+                core_api=self._k8s_core_api,
+                namespace=self.config.kubernetes_namespace,
+                secret_name=secret_name,
+                data={KUBERNETES_SECRET_TOKEN_KEY_NAME: token},
+            )
+            orchestrator_pod_settings.env.append(
+                {
+                    "name": "ZENML_STORE_API_TOKEN",
+                    "valueFrom": {
+                        "secretKeyRef": {
+                            "name": secret_name,
+                            "key": KUBERNETES_SECRET_TOKEN_KEY_NAME,
+                        }
+                    },
+                }
+            )
+
+        orchestrator_pod_labels = {
+            "pipeline": kube_utils.sanitize_label(pipeline_name),
+        }
+
+        if placeholder_run:
+            orchestrator_pod_labels["run_id"] = kube_utils.sanitize_label(
+                str(placeholder_run.id)
+            )
+            orchestrator_pod_labels["run_name"] = kube_utils.sanitize_label(
+                placeholder_run.name
+            )
+
+        pod_manifest = build_pod_manifest(
+            pod_name=None,
+            image_name=image,
+            command=command,
+            args=args,
+            privileged=False,
+            pod_settings=orchestrator_pod_settings,
+            service_account_name=service_account_name,
+            env=environment,
+            labels=orchestrator_pod_labels,
+            mount_local_stores=self.config.is_local,
+            termination_grace_period_seconds=settings.pod_stop_grace_period,
+        )
+
+        pod_failure_policy = settings.pod_failure_policy or {
+            # These rules are applied sequentially. This means any failure in
+            # the main container will count towards the max retries. Any other
+            # disruption will not count towards the max retries.
+            "rules": [
+                # If the main container fails, we count it towards the max
+                # retries.
+                {
+                    "action": "Count",
+                    "onExitCodes": {
+                        "containerName": "main",
+                        "operator": "NotIn",
+                        "values": [0],
+                    },
+                },
+                # If the pod is interrupted at any other time, we don't count
+                # it as a retry
+                {
+                    "action": "Ignore",
+                    "onPodConditions": [
+                        {
+                            "type": "DisruptionTarget",
+                            "status": "True",
+                        }
+                    ],
+                },
+            ]
+        }
+
+        job_name = settings.job_name_prefix or ""
+        random_prefix = "".join(random.choices("0123456789abcdef", k=8))
+        job_name += f"-{random_prefix}-{snapshot.pipeline_configuration.name}"
+        # The job name will be used as a label on the pods, so we need to make
+        # sure it doesn't exceed the label length limit
+        job_name = kube_utils.sanitize_label(job_name)
+
+        job_manifest = build_job_manifest(
+            job_name=job_name,
+            pod_template=pod_template_manifest_from_pod(pod_manifest),
+            backoff_limit=settings.orchestrator_job_backoff_limit,
+            ttl_seconds_after_finished=settings.ttl_seconds_after_finished,
+            active_deadline_seconds=settings.active_deadline_seconds,
+            pod_failure_policy=pod_failure_policy,
+            labels=orchestrator_pod_labels,
+            annotations={
+                ORCHESTRATOR_ANNOTATION_KEY: str(self.id),
+            },
+        )
+
+        if snapshot.schedule:
+            raise RuntimeError("Dynamic pipelines cannot be scheduled yet.")
+        else:
+            try:
+                kube_utils.create_job(
+                    batch_api=self._k8s_batch_api,
+                    namespace=self.config.kubernetes_namespace,
+                    job_manifest=job_manifest,
+                )
+            except Exception as e:
+                if self.config.pass_zenml_token_as_secret:
+                    secret_name = self.get_token_secret_name(snapshot.id)
+                    try:
+                        kube_utils.delete_secret(
+                            core_api=self._k8s_core_api,
+                            namespace=self.config.kubernetes_namespace,
+                            secret_name=secret_name,
+                        )
+                    except Exception as cleanup_error:
+                        logger.error(
+                            "Error cleaning up secret %s: %s",
+                            secret_name,
+                            cleanup_error,
+                        )
+                raise e
+
+            if settings.synchronous:
+
+                def _wait_for_run_to_finish() -> None:
+                    logger.info("Waiting for orchestrator job to finish...")
+                    kube_utils.wait_for_job_to_finish(
+                        batch_api=self._k8s_batch_api,
+                        core_api=self._k8s_core_api,
+                        namespace=self.config.kubernetes_namespace,
+                        job_name=job_name,
+                        backoff_interval=settings.job_monitoring_interval,
+                        fail_on_container_waiting_reasons=settings.fail_on_container_waiting_reasons,
+                        stream_logs=True,
+                    )
+
+                return SubmissionResult(
+                    wait_for_completion=_wait_for_run_to_finish,
+                )
+            else:
+                logger.info(
+                    f"Orchestrator job `{job_name}` started. "
+                    f"Run the following command to inspect the logs: "
+                    f"`kubectl -n {self.config.kubernetes_namespace} logs "
+                    f"job/{job_name}`"
+                )
+                return None
+
+    def run_dynamic_out_of_process_step(
+        self, step_run_info: "StepRunInfo", environment: Dict[str, str]
+    ) -> None:
+        from zenml.step_operators.step_operator_entrypoint_configuration import (
+            StepOperatorEntrypointConfiguration,
+        )
+
+        settings = cast(
+            KubernetesOrchestratorSettings, self.get_settings(step_run_info)
+        )
+        image_name = step_run_info.get_image(key=ORCHESTRATOR_DOCKER_IMAGE_KEY)
+        command = StepOperatorEntrypointConfiguration.get_entrypoint_command()
+        args = StepOperatorEntrypointConfiguration.get_entrypoint_arguments(
+            step_name=step_run_info.pipeline_step_name,
+            snapshot_id=step_run_info.snapshot_id,
+            step_run_id=str(step_run_info.step_run_id),
+        )
+
+        step_labels = {
+            "run_id": kube_utils.sanitize_label(str(step_run_info.run_id)),
+            "run_name": kube_utils.sanitize_label(str(step_run_info.run_name)),
+            "pipeline": kube_utils.sanitize_label(step_run_info.pipeline.name),
+            "step_name": kube_utils.sanitize_label(
+                step_run_info.pipeline_step_name
+            ),
+        }
+        step_annotations = {
+            STEP_NAME_ANNOTATION_KEY: step_run_info.pipeline_step_name,
+        }
+
+        # We set some default minimum memory resource requests for the step pod
+        # here if the user has not specified any, because the step pod takes up
+        # some memory resources itself and, if not specified, the pod will be
+        # scheduled on any node regardless of available memory and risk
+        # negatively impacting or even crashing the node due to memory pressure.
+        pod_settings = kube_utils.apply_default_resource_requests(
+            memory="400Mi",
+            pod_settings=settings.pod_settings,
+        )
+
+        pod_manifest = build_pod_manifest(
+            pod_name=None,
+            image_name=image_name,
+            command=command,
+            args=args,
+            env=environment,
+            privileged=settings.privileged,
+            pod_settings=pod_settings,
+            service_account_name=settings.service_account_name,
+            labels=step_labels,
+        )
+
+        job_name = settings.job_name_prefix or ""
+        random_prefix = "".join(random.choices("0123456789abcdef", k=8))
+        job_name += f"-{random_prefix}-{step_run_info.pipeline_step_name}-{step_run_info.pipeline.name}"
+        # The job name will be used as a label on the pods, so we need to make
+        # sure it doesn't exceed the label length limit
+        job_name = kube_utils.sanitize_label(job_name)
+
+        job_manifest = build_job_manifest(
+            job_name=job_name,
+            pod_template=pod_template_manifest_from_pod(pod_manifest),
+            # The orchestrator already handles retries, so we don't need to
+            # retry the step operator job.
+            backoff_limit=0,
+            ttl_seconds_after_finished=settings.ttl_seconds_after_finished,
+            active_deadline_seconds=settings.active_deadline_seconds,
+            labels=step_labels,
+            annotations=step_annotations,
+        )
+
+        kube_utils.create_job(
+            batch_api=self._k8s_batch_api,
+            namespace=self.config.kubernetes_namespace,
+            job_manifest=job_manifest,
+        )
+
+        logger.info(
+            "Waiting for job `%s` to finish...",
+            job_name,
+        )
+        kube_utils.wait_for_job_to_finish(
+            batch_api=self._k8s_batch_api,
+            core_api=self._k8s_core_api,
+            namespace=self.config.kubernetes_namespace,
+            job_name=job_name,
+            fail_on_container_waiting_reasons=settings.fail_on_container_waiting_reasons,
+            stream_logs=True,
+        )
+        logger.info("Job completed.")
+
     def _get_service_account_name(
         self, settings: KubernetesOrchestratorSettings
     ) -> str: