FujitsuResearch · sotanengel · May 7, 2026 · May 13, 2026 · May 18, 2026 · May 18, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,24 @@
 # Change log
 
+## [v1.1.1] 2026-05-19
+
+### New Feature: Quantization progress logging
+
+- Added `QuantizationProgressTracker` (`onecomp/utils/quantization_progress.py`) that emits a single `[progress]` INFO line per completed step with done/total, percentage, elapsed time, and a linear ETA estimate; supports an optional `thread_safe=True` mode for multi-GPU quantization
+- Added `report_progress: bool = True` flag to `Runner.__init__` (`onecomp/runner.py`) and to the underlying entry points `run_chunked_quantization` (`onecomp/runner_methods/chunked_quantization.py`), `run_multi_gpu_quantization` / `run_quantization_phase` (`onecomp/runner_methods/multi_gpu_quantization.py`), `run_quantize_with_qep` (`onecomp/qep/_quantize_with_qep.py`), and `run_quantize_with_qep_arch` (`onecomp/qep/_quantize_with_qep_arch.py`) so long quantization runs (calibration, chunked, multi-GPU, QEP) report progress by default; pass `report_progress=False` for quiet runs
+
+### Bug Fixes
+
+- Raise a clear error when ``Runner`` is configured with ``qep=True`` and a quantizer that does not support QEP (currently `JointQ`). Previously the run failed deep inside `quantize_with_qep` / `adjust_weight` with a confusing low-level error. `Runner.check()` now reports e.g. "Quantizer 'JointQ' (or one of its candidate quantizers) does not support QEP (Quantization Error Propagation). Set qep=False, or use a QEP-compatible quantizer (e.g., GPTQ, DBF, AutoBitQuantizer with QEP-compatible candidates)." Implementation: added `flag_qep_supported` (default `True`) on `Quantizer`, set to `False` on `JointQ`, and propagated via `AutoBitQuantizer._sync_flags` (only `True` when *all* candidate quantizers support QEP).
+
+### Tests
+
+- Added `tests/onecomp/test_runner_check.py` covering the new `qep=True` validation path: JointQ + qep=True raises a clear `ValueError`, while JointQ + qep=False and GPTQ + qep=True both pass `Runner.check()`.
+
+### New Contributors
+
+- [@sotanengel](https://github.com/sotanengel) made their first contribution in [#13](https://github.com/FujitsuResearch/OneCompression/pull/13)
+
 ## [v1.1.0] 2026-04-16
 
 ### Gemma 3 / Gemma 4 & VLM Support

diff --git a/onecomp/__version__.py b/onecomp/__version__.py
@@ -6,4 +6,4 @@
 
 """
 
-__version__ = "1.1.0"
+__version__ = "1.1.1"
diff --git a/onecomp/cli.py b/onecomp/cli.py
@@ -63,6 +63,14 @@ def main():
         default="auto",
         help='save directory (default: auto-generated, "none" to skip)',
     )
+    parser.add_argument(
+        "--check-env",
+        action="store_true",
+        help=(
+            "Print an environment and memory report before quantization. "
+            "Exits with code 1 if OOM risk is 'danger'."
+        ),
+    )
     parser.add_argument(
         "--version",
         action="version",
@@ -76,6 +84,23 @@ def main():
     # Lazy import to keep --help fast
     from .runner import Runner  # pylint: disable=import-outside-toplevel
 
+    if args.check_env:
+        import sys  # pylint: disable=import-outside-toplevel
+        from .utils.vram_estimator import (  # pylint: disable=import-outside-toplevel
+            check_environment,
+            print_env_report,
+        )
+
+        env_result = check_environment(
+            args.model_id,
+            total_vram_gb=args.total_vram_gb,
+            group_size=args.groupsize,
+            save_dir=save_dir if isinstance(save_dir, str) and save_dir != "auto" else None,
+        )
+        print_env_report(env_result, total_vram_gb_override=args.total_vram_gb)
+        if env_result.risk == "danger":
+            sys.exit(1)
+
     Runner.auto_run(
         model_id=args.model_id,
         wbits=args.wbits,

diff --git a/onecomp/qep/_quantize_with_qep.py b/onecomp/qep/_quantize_with_qep.py
@@ -27,6 +27,7 @@
 from onecomp.qep._qep_config import QEPConfig
 from onecomp.quantizer._quantizer import Quantizer
 from onecomp.utils import capture_input_activations
+from onecomp.utils.quantization_progress import QuantizationProgressTracker
 
 logger = getLogger(__name__)
 
@@ -36,6 +37,8 @@ def run_quantize_with_qep(
     quantizer: Quantizer,
     qep_config: QEPConfig,
     calibration_config: CalibrationConfig,
+    *,
+    report_progress: bool = True,
 ):
     """Run quantization with Quantization Error Propagation (QEP).
 
@@ -51,6 +54,7 @@ def run_quantize_with_qep(
         qep_config (QEPConfig): Configuration for QEP
             (percdamp, perccorr, exclude_layer_keywords).
         calibration_config (CalibrationConfig): Calibration parameters.
+        report_progress (bool): When True, log ``[progress]`` with ETA per layer.
 
     """
     model = model_config.load_model()
@@ -80,6 +84,14 @@ def run_quantize_with_qep(
 
     logger.info("Quantizing the model using %s", quantizer.name)
 
+    progress = None
+    if report_progress:
+        progress = QuantizationProgressTracker(
+            logger,
+            len(quantizer.module_to_name),
+            "QEP quantization (general, per layer)",
+        )
+
     # 2. For each target layer, perform the following sequentially
     for module, name in quantizer.module_to_name.items():
 
@@ -114,6 +126,8 @@ def run_quantize_with_qep(
 
         # 2-4. Free memory
         del quant_input_activation
+        if progress is not None:
+            progress.step_complete(name)
 
     del original_input_activations
     quantizer.execute_post_processing()
diff --git a/onecomp/qep/_quantize_with_qep_arch.py b/onecomp/qep/_quantize_with_qep_arch.py
@@ -33,6 +33,7 @@
     move_kwargs_to_device,
     expand_kwargs_batch,
 )
+from onecomp.utils.quantization_progress import QuantizationProgressTracker
 
 logger = getLogger(__name__)
 
@@ -143,6 +144,7 @@ def compute_hessian_and_crossterm(
     def make_hook(name):
         def hook(module, inp, out):
             dest[name] = inp[0] if isinstance(inp, tuple) else inp
+
         return hook
 
     handlers = [
@@ -213,6 +215,7 @@ def _compute_per_module_hessians(
     def _make_hook(key):
         def hook(_, inp, __):
             dest[key] = inp[0] if isinstance(inp, tuple) else inp
+
         return hook
 
     handlers = [m.register_forward_hook(_make_hook(i)) for i, m in enumerate(modules)]
@@ -251,10 +254,7 @@ def hook(_, inp, __):
     for h in handlers:
         h.remove()
 
-    return {
-        modules[i]: (hessians[i] if nsamples[i] > 0 else None)
-        for i in range(len(modules))
-    }
+    return {modules[i]: (hessians[i] if nsamples[i] > 0 else None) for i in range(len(modules))}
 
 
 @torch.no_grad()
@@ -263,6 +263,8 @@ def run_quantize_with_qep_arch(
     quantizer: Quantizer,
     qep_config: QEPConfig,
     calibration_config: CalibrationConfig,
+    *,
+    report_progress: bool = True,
 ):
     """Run architecture-aware quantization with QEP.
 
@@ -279,6 +281,7 @@ def run_quantize_with_qep_arch(
         qep_config (QEPConfig): Configuration for QEP
             (percdamp, perccorr, exclude_layer_keywords).
         calibration_config (CalibrationConfig): Calibration parameters.
+        report_progress (bool): When True, log ``[progress]`` with ETA per target layer.
 
     """
 
@@ -318,6 +321,14 @@ def run_quantize_with_qep_arch(
         name for module, name in quantizer.module_to_name.items() if module in block_modules
     }
 
+    progress = None
+    if report_progress:
+        progress = QuantizationProgressTracker(
+            logger,
+            len(remaining_targets),
+            "QEP quantization (architecture-aware)",
+        )
+
     # 2. For each target transformer block, perform the following sequentially
     for block_idx, block in enumerate(blocks):
 
@@ -365,9 +376,7 @@ def run_quantize_with_qep_arch(
             targets = [m for m in group_q if m in quantizer.module_to_name]
             if not targets:
                 continue
-            is_expert = any(
-                ".experts." in quantizer.module_to_name[m] for m in targets
-            )
+            is_expert = any(".experts." in quantizer.module_to_name[m] for m in targets)
             if is_expert:
                 expert_modules_q.extend(targets)
             else:
@@ -442,6 +451,8 @@ def run_quantize_with_qep_arch(
                         name,
                     )
                 remaining_targets.discard(name)
+                if progress is not None:
+                    progress.step_complete(name)
 
         # 4. Process MoE expert layers with per-module Hessians (no cross-term)
         if expert_modules_q:
@@ -451,7 +462,12 @@ def run_quantize_with_qep_arch(
                 len(expert_modules_q),
             )
             expert_hessians = _compute_per_module_hessians(
-                block_q, expert_modules_q, inps_q, kwargs, batch_size, device,
+                block_q,
+                expert_modules_q,
+                inps_q,
+                kwargs,
+                batch_size,
+                device,
             )
             for module_q in expert_modules_q:
                 name = quantizer.module_to_name[module_q]
@@ -462,6 +478,8 @@ def run_quantize_with_qep_arch(
                         name,
                     )
                     remaining_targets.discard(name)
+                    if progress is not None:
+                        progress.step_complete(f"{name} (skipped, no tokens)")
                     continue
 
                 logger.info(
@@ -489,6 +507,8 @@ def run_quantize_with_qep_arch(
                         name,
                     )
                 remaining_targets.discard(name)
+                if progress is not None:
+                    progress.step_complete(name)
 
         # forward input to the next block
         inps_q = forward_input(inps_q, block_q, kwargs, batch_size, device)

diff --git a/onecomp/quantizer/_quantizer.py b/onecomp/quantizer/_quantizer.py
@@ -174,6 +174,7 @@ class Quantizer(metaclass=ABCMeta):
     flag_calibration: bool = False
     flag_hessian: bool = False
     flag_xtx: bool = False  # Whether X^T X is needed (e.g., JointQ)
+    flag_qep_supported: bool = True
 
     def __post_init__(self):
         """__post_init__ method"""

diff --git a/onecomp/quantizer/autobit/_autobit.py b/onecomp/quantizer/autobit/_autobit.py
@@ -393,6 +393,11 @@ def _sync_flags(self):
             self.flag_calibration = any(q.flag_calibration for q in self.quantizers)
             self.flag_hessian = any(q.flag_hessian for q in self.quantizers)
             self.flag_xtx = any(q.flag_xtx for q in self.quantizers)
+            # AutoBit supports QEP only when *all* candidate quantizers support it
+            # (the per-layer assignment may dispatch to any child quantizer).
+            self.flag_qep_supported = all(
+                q.flag_qep_supported for q in self.quantizers
+            )
 
     def _validate_manual_fused_consistency(self):
         """Check that manual keyword rules don't split fused groups."""

diff --git a/onecomp/quantizer/jointq/_jointq.py b/onecomp/quantizer/jointq/_jointq.py
@@ -250,6 +250,9 @@ class JointQ(Quantizer):
     flag_calibration: bool = True
     flag_hessian: bool = False
     flag_xtx: bool = True
+    # JointQ does not yet support the generic QEP pipeline.
+    # Planned for a future release.
+    flag_qep_supported: bool = False
     hessian_dtype: torch.dtype = torch.float64
 
     # Parameters for the JointQ quantizer
Original file line number	Diff line number	Diff line change
Expand Up		@@ -6,4 +6,4 @@

		"""

		__version__ = "1.1.0"
		__version__ = "1.1.1"