Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,24 @@
# Change log

## [v1.1.1] 2026-05-19

### New Feature: Quantization progress logging

- Added `QuantizationProgressTracker` (`onecomp/utils/quantization_progress.py`) that emits a single `[progress]` INFO line per completed step with done/total, percentage, elapsed time, and a linear ETA estimate; supports an optional `thread_safe=True` mode for multi-GPU quantization
- Added `report_progress: bool = True` flag to `Runner.__init__` (`onecomp/runner.py`) and to the underlying entry points `run_chunked_quantization` (`onecomp/runner_methods/chunked_quantization.py`), `run_multi_gpu_quantization` / `run_quantization_phase` (`onecomp/runner_methods/multi_gpu_quantization.py`), `run_quantize_with_qep` (`onecomp/qep/_quantize_with_qep.py`), and `run_quantize_with_qep_arch` (`onecomp/qep/_quantize_with_qep_arch.py`) so long quantization runs (calibration, chunked, multi-GPU, QEP) report progress by default; pass `report_progress=False` for quiet runs

### Bug Fixes

- Raise a clear error when ``Runner`` is configured with ``qep=True`` and a quantizer that does not support QEP (currently `JointQ`). Previously the run failed deep inside `quantize_with_qep` / `adjust_weight` with a confusing low-level error. `Runner.check()` now reports e.g. "Quantizer 'JointQ' (or one of its candidate quantizers) does not support QEP (Quantization Error Propagation). Set qep=False, or use a QEP-compatible quantizer (e.g., GPTQ, DBF, AutoBitQuantizer with QEP-compatible candidates)." Implementation: added `flag_qep_supported` (default `True`) on `Quantizer`, set to `False` on `JointQ`, and propagated via `AutoBitQuantizer._sync_flags` (only `True` when *all* candidate quantizers support QEP).

### Tests

- Added `tests/onecomp/test_runner_check.py` covering the new `qep=True` validation path: JointQ + qep=True raises a clear `ValueError`, while JointQ + qep=False and GPTQ + qep=True both pass `Runner.check()`.

### New Contributors

- [@sotanengel](https://github.com/sotanengel) made their first contribution in [#13](https://github.com/FujitsuResearch/OneCompression/pull/13)

## [v1.1.0] 2026-04-16

### Gemma 3 / Gemma 4 & VLM Support
Expand Down
2 changes: 1 addition & 1 deletion onecomp/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@

"""

__version__ = "1.1.0"
__version__ = "1.1.1"
25 changes: 25 additions & 0 deletions onecomp/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ def main():
default="auto",
help='save directory (default: auto-generated, "none" to skip)',
)
parser.add_argument(
"--check-env",
action="store_true",
help=(
"Print an environment and memory report before quantization. "
"Exits with code 1 if OOM risk is 'danger'."
),
)
parser.add_argument(
"--version",
action="version",
Expand All @@ -76,6 +84,23 @@ def main():
# Lazy import to keep --help fast
from .runner import Runner # pylint: disable=import-outside-toplevel

if args.check_env:
import sys # pylint: disable=import-outside-toplevel
from .utils.vram_estimator import ( # pylint: disable=import-outside-toplevel
check_environment,
print_env_report,
)

env_result = check_environment(
args.model_id,
total_vram_gb=args.total_vram_gb,
group_size=args.groupsize,
save_dir=save_dir if isinstance(save_dir, str) and save_dir != "auto" else None,
)
print_env_report(env_result, total_vram_gb_override=args.total_vram_gb)
if env_result.risk == "danger":
sys.exit(1)

Runner.auto_run(
model_id=args.model_id,
wbits=args.wbits,
Expand Down
14 changes: 14 additions & 0 deletions onecomp/qep/_quantize_with_qep.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from onecomp.qep._qep_config import QEPConfig
from onecomp.quantizer._quantizer import Quantizer
from onecomp.utils import capture_input_activations
from onecomp.utils.quantization_progress import QuantizationProgressTracker

logger = getLogger(__name__)

Expand All @@ -36,6 +37,8 @@ def run_quantize_with_qep(
quantizer: Quantizer,
qep_config: QEPConfig,
calibration_config: CalibrationConfig,
*,
report_progress: bool = True,
):
"""Run quantization with Quantization Error Propagation (QEP).

Expand All @@ -51,6 +54,7 @@ def run_quantize_with_qep(
qep_config (QEPConfig): Configuration for QEP
(percdamp, perccorr, exclude_layer_keywords).
calibration_config (CalibrationConfig): Calibration parameters.
report_progress (bool): When True, log ``[progress]`` with ETA per layer.

"""
model = model_config.load_model()
Expand Down Expand Up @@ -80,6 +84,14 @@ def run_quantize_with_qep(

logger.info("Quantizing the model using %s", quantizer.name)

progress = None
if report_progress:
progress = QuantizationProgressTracker(
logger,
len(quantizer.module_to_name),
"QEP quantization (general, per layer)",
)

# 2. For each target layer, perform the following sequentially
for module, name in quantizer.module_to_name.items():

Expand Down Expand Up @@ -114,6 +126,8 @@ def run_quantize_with_qep(

# 2-4. Free memory
del quant_input_activation
if progress is not None:
progress.step_complete(name)

del original_input_activations
quantizer.execute_post_processing()
36 changes: 28 additions & 8 deletions onecomp/qep/_quantize_with_qep_arch.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
move_kwargs_to_device,
expand_kwargs_batch,
)
from onecomp.utils.quantization_progress import QuantizationProgressTracker

logger = getLogger(__name__)

Expand Down Expand Up @@ -143,6 +144,7 @@ def compute_hessian_and_crossterm(
def make_hook(name):
def hook(module, inp, out):
dest[name] = inp[0] if isinstance(inp, tuple) else inp

return hook

handlers = [
Expand Down Expand Up @@ -213,6 +215,7 @@ def _compute_per_module_hessians(
def _make_hook(key):
def hook(_, inp, __):
dest[key] = inp[0] if isinstance(inp, tuple) else inp

return hook

handlers = [m.register_forward_hook(_make_hook(i)) for i, m in enumerate(modules)]
Expand Down Expand Up @@ -251,10 +254,7 @@ def hook(_, inp, __):
for h in handlers:
h.remove()

return {
modules[i]: (hessians[i] if nsamples[i] > 0 else None)
for i in range(len(modules))
}
return {modules[i]: (hessians[i] if nsamples[i] > 0 else None) for i in range(len(modules))}


@torch.no_grad()
Expand All @@ -263,6 +263,8 @@ def run_quantize_with_qep_arch(
quantizer: Quantizer,
qep_config: QEPConfig,
calibration_config: CalibrationConfig,
*,
report_progress: bool = True,
):
"""Run architecture-aware quantization with QEP.

Expand All @@ -279,6 +281,7 @@ def run_quantize_with_qep_arch(
qep_config (QEPConfig): Configuration for QEP
(percdamp, perccorr, exclude_layer_keywords).
calibration_config (CalibrationConfig): Calibration parameters.
report_progress (bool): When True, log ``[progress]`` with ETA per target layer.

"""

Expand Down Expand Up @@ -318,6 +321,14 @@ def run_quantize_with_qep_arch(
name for module, name in quantizer.module_to_name.items() if module in block_modules
}

progress = None
if report_progress:
progress = QuantizationProgressTracker(
logger,
len(remaining_targets),
"QEP quantization (architecture-aware)",
)

# 2. For each target transformer block, perform the following sequentially
for block_idx, block in enumerate(blocks):

Expand Down Expand Up @@ -365,9 +376,7 @@ def run_quantize_with_qep_arch(
targets = [m for m in group_q if m in quantizer.module_to_name]
if not targets:
continue
is_expert = any(
".experts." in quantizer.module_to_name[m] for m in targets
)
is_expert = any(".experts." in quantizer.module_to_name[m] for m in targets)
if is_expert:
expert_modules_q.extend(targets)
else:
Expand Down Expand Up @@ -442,6 +451,8 @@ def run_quantize_with_qep_arch(
name,
)
remaining_targets.discard(name)
if progress is not None:
progress.step_complete(name)

# 4. Process MoE expert layers with per-module Hessians (no cross-term)
if expert_modules_q:
Expand All @@ -451,7 +462,12 @@ def run_quantize_with_qep_arch(
len(expert_modules_q),
)
expert_hessians = _compute_per_module_hessians(
block_q, expert_modules_q, inps_q, kwargs, batch_size, device,
block_q,
expert_modules_q,
inps_q,
kwargs,
batch_size,
device,
)
for module_q in expert_modules_q:
name = quantizer.module_to_name[module_q]
Expand All @@ -462,6 +478,8 @@ def run_quantize_with_qep_arch(
name,
)
remaining_targets.discard(name)
if progress is not None:
progress.step_complete(f"{name} (skipped, no tokens)")
continue

logger.info(
Expand Down Expand Up @@ -489,6 +507,8 @@ def run_quantize_with_qep_arch(
name,
)
remaining_targets.discard(name)
if progress is not None:
progress.step_complete(name)

# forward input to the next block
inps_q = forward_input(inps_q, block_q, kwargs, batch_size, device)
Expand Down
1 change: 1 addition & 0 deletions onecomp/quantizer/_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ class Quantizer(metaclass=ABCMeta):
flag_calibration: bool = False
flag_hessian: bool = False
flag_xtx: bool = False # Whether X^T X is needed (e.g., JointQ)
flag_qep_supported: bool = True

def __post_init__(self):
"""__post_init__ method"""
Expand Down
5 changes: 5 additions & 0 deletions onecomp/quantizer/autobit/_autobit.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,11 @@ def _sync_flags(self):
self.flag_calibration = any(q.flag_calibration for q in self.quantizers)
self.flag_hessian = any(q.flag_hessian for q in self.quantizers)
self.flag_xtx = any(q.flag_xtx for q in self.quantizers)
# AutoBit supports QEP only when *all* candidate quantizers support it
# (the per-layer assignment may dispatch to any child quantizer).
self.flag_qep_supported = all(
q.flag_qep_supported for q in self.quantizers
)

def _validate_manual_fused_consistency(self):
"""Check that manual keyword rules don't split fused groups."""
Expand Down
3 changes: 3 additions & 0 deletions onecomp/quantizer/jointq/_jointq.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,9 @@ class JointQ(Quantizer):
flag_calibration: bool = True
flag_hessian: bool = False
flag_xtx: bool = True
# JointQ does not yet support the generic QEP pipeline.
# Planned for a future release.
flag_qep_supported: bool = False
hessian_dtype: torch.dtype = torch.float64

# Parameters for the JointQ quantizer
Expand Down
Loading