Skip to content

With MongoDb as job repository, restarting Abrupt Shutdown does not work as expectedΒ #4943

@loonis

Description

@loonis

Bug description
With MongoDb as job repository, restarting Abrupt Shutdown does not work.

Environment
spring-boot-starter-batch 3.5.3 / MongoDB 8.0.12

Steps to reproduce
When a job is stopped properly with

jobOperator.stop(execId);

Here is the result in db :

// collection: content_video.BATCH_JOB_EXECUTION
{
  "_id": "689231387c9236ffa8bc83d8",
  "jobExecutionId": "42",
  "jobInstanceId": "33",
  "jobParameters": {
    "run.id": {
      "value": "fixed",
      "type": "java.lang.String",
      "identifying": true
    }
  },
  "stepExecutions": [
    {
      "stepExecutionId": "40",
      "jobExecutionId": "42",
      "name": "ytVaoSyncStep",
      "status": "STOPPED",
      "readCount": "6000",
      "writeCount": "6000",
      "commitCount": "6",
      "rollbackCount": "0",
      "readSkipCount": "0",
      "processSkipCount": "0",
      "writeSkipCount": "0",
      "filterCount": "0",
      "startTime": "2025-08-05T16:28:40.824Z",
      "createTime": "2025-08-05T16:28:40.714Z",
      "endTime": "2025-08-05T16:29:38.126Z",
      "lastUpdated": "2025-08-05T16:29:38.156Z",
      "executionContext": {
        "map": {
          "batch.taskletType": "org.springframework.batch.core.step.item.ChunkOrientedTasklet",
          "batch.executed": true,
          "batch.version": "5.2.2",
          "batch.stepType": "org.springframework.batch.core.step.tasklet.TaskletStep",
          "yt.sync.channel.index": 5,
          "yt.sync.video.index": 35
        },
        "dirty": true
      },
      "exitStatus": {
        "exitCode": "STOPPED",
        "exitDescription": "org.springframework.batch.core.JobInterruptedException"
      },
      "terminateOnly": true
    }
  ],
  "status": "STOPPED",
  "startTime": "2025-08-05T16:28:40.533Z",
  "createTime": "2025-08-05T16:28:40.357Z",
  "endTime": "2025-08-05T16:29:38.280Z",
  "lastUpdated": "2025-08-05T16:29:38.280Z",
  "exitStatus": {
    "exitCode": "STOPPED",
    "exitDescription": "org.springframework.batch.core.JobInterruptedException"
  },
  "executionContext": {
    "map": {
      "batch.version": "5.2.2"
    },
    "dirty": true
  },
  "_class": "org.springframework.batch.core.repository.persistence.JobExecution"
}
// collection: content_video.BATCH_STEP_EXECUTION
{
  "_id": "689231387c9236ffa8bc83d9",
  "stepExecutionId": "40",
  "jobExecutionId": "42",
  "name": "ytVaoSyncStep",
  "status": "STOPPED",
  "readCount": "6000",
  "writeCount": "6000",
  "commitCount": "6",
  "rollbackCount": "0",
  "readSkipCount": "0",
  "processSkipCount": "0",
  "writeSkipCount": "0",
  "filterCount": "0",
  "startTime": "2025-08-05T16:28:40.824Z",
  "createTime": "2025-08-05T16:28:40.714Z",
  "endTime": "2025-08-05T16:29:38.126Z",
  "lastUpdated": "2025-08-05T16:29:38.156Z",
  "executionContext": {
    "map": {
      "batch.version": "5.2.2",
      "batch.taskletType": "org.springframework.batch.core.step.item.ChunkOrientedTasklet",
      "batch.stepType": "org.springframework.batch.core.step.tasklet.TaskletStep",
      "yt.sync.channel.index": 5,
      "yt.sync.video.index": 35
    },
    "dirty": true
  },
  "exitStatus": {
    "exitCode": "STOPPED",
    "exitDescription": "org.springframework.batch.core.JobInterruptedException"
  },
  "terminateOnly": true,
  "_class": "org.springframework.batch.core.repository.persistence.StepExecution"
}

As you can see, BATCH_JOB_EXECUTION contains the stepExecutions.

When restarting the job with the same parameters, it works as excepted and the job restart with the right context.

But when the job is stopped abruptly (server shutdown), spring doc recommend the set the status to FAILED manually : https://spring.io/blog/2021/01/27/spring-batch-on-kubernetes-efficient-batch-processing-at-scale#4-gracefulabrupt-shutdown-implication

So we update the status from STARTED to FAILED

// collection: BATCH_JOB_EXECUTION

{
    "_id": "68930f7d06fc21b2e19f5b64",
    "jobExecutionId": "45",
    "jobInstanceId": "34",
    "jobParameters": {
      "runId": {
        "value": "1",
        "type": "java.lang.Long",
        "identifying": true
      }
    },
    "status": "FAILED",
    "startTime": "2025-08-06T08:17:01.498Z",
    "createTime": "2025-08-06T08:17:01.363Z",
    "lastUpdated": "2025-08-06T08:17:01.499Z",
    "exitStatus": {
      "exitCode": "FAILED",
      "exitDescription": "Marked STOPPED after abrupt shutdown"
    },
    "executionContext": {
      "map": {
        "batch.version": "5.2.2"
      },
      "dirty": true
    },
    "_class": "org.springframework.batch.core.repository.persistence.JobExecution",
    "endTime": "2025-08-06T08:18:14.865Z",
    "stepExecutions": []
  }
// collection: BATCH_STEP_EXECUTION

{
    "_id": "68930f8206fc21b2e19f5b65",
    "stepExecutionId": "43",
    "jobExecutionId": "45",
    "name": "ytVaoSyncStep",
    "status": "FAILED",
    "readCount": "4000",
    "writeCount": "4000",
    "commitCount": "4",
    "rollbackCount": "0",
    "readSkipCount": "0",
    "processSkipCount": "0",
    "writeSkipCount": "0",
    "filterCount": "0",
    "startTime": "2025-08-06T08:17:06.268Z",
    "createTime": "2025-08-06T08:17:06.169Z",
    "lastUpdated": "2025-08-06T08:17:50.327Z",
    "executionContext": {
      "map": {
        "batch.version": "5.2.2",
        "batch.taskletType": "org.springframework.batch.core.step.item.ChunkOrientedTasklet",
        "batch.stepType": "org.springframework.batch.core.step.tasklet.TaskletStep",
        "yt.sync.channel.index": 3,
        "yt.sync.video.index": 444
      },
      "dirty": true
    },
    "exitStatus": {
      "exitCode": "FAILED",
      "exitDescription": ""
    },
    "terminateOnly": false,
    "_class": "org.springframework.batch.core.repository.persistence.StepExecution"
  }

But the BATCH_JOB_EXECUTION.stepExecutions stays empty.

When restarting, the method org.springframework.batch.core.repository.dao.MongoStepExecutionDao#getLastStepExecution get the stepExecution from jobExecution.getStepExecutions() which is empty and causes problems.

I think, like the jdbc version it should fetch the stepExecution from the collection BATCH_STEP_EXECUTION and not from BATCH_JOB_EXECUTION.stepExecutions

https://github.com/spring-projects/spring-batch/blob/b0eccd2b99c5525c0cbb07f79eb0b68056c39f6f/spring-batch-core/src/main/java/org/springframework/batch/core/repository/dao/jdbc/JdbcStepExecutionDao.java

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions