fix: correct help text for on_demand_checkpointing sync point count

RobotSail · RobotSail · commit a4e83d5e9117 · 2026-04-14T01:59:39.000Z
diff --git a/src/instructlab/training/main_ds.py b/src/instructlab/training/main_ds.py
@@ -1207,11 +1207,11 @@ def run_training(torch_args: TorchrunArgs, train_args: TrainingArgs) -> None:
         default=False,
         help=(
             "Enable on-demand full-state checkpointing triggered by Unix signals. "
-            "When enabled, workers check for a trigger file in /dev/shm at five "
-            "synchronization points per step (before/after each minibatch forward "
-            "and backward pass, and before/after the optimizer step) and collectively "
-            "save a distributed checkpoint before exiting. Designed for OpenShift AI / "
-            "KubeFlow preemption handling."
+            "When enabled, workers check for a trigger file in /dev/shm at "
+            "multiple synchronization points per step (before/after each "
+            "minibatch forward and backward pass, and before/after the optimizer "
+            "step) and collectively save a distributed checkpoint before exiting. "
+            "Designed for OpenShift AI / KubeFlow preemption handling."
         ),
     )
     parser.add_argument(