11# (C) 2026 GoodData Corporation
22from __future__ import annotations
33
4+ import logging
45from typing import Callable
56
67import orjson
4344
4445_REQUIRED_SCHEMA_KEYS = (_META_XTAB , _META_MODEL , _META_VIEW )
4546
47+ logger = logging .getLogger (__name__ )
48+
4649
4750def read_model_labels (table : pa .Table ) -> dict :
4851 """Return the ``labels`` dict from the Arrow table's ``x-gdc-model-v1`` schema metadata.
@@ -133,9 +136,14 @@ def _parse_schema_metadata(table: pa.Table) -> dict:
133136 raise ValueError (
134137 "Arrow table has no schema metadata. Expected GoodData metadata keys: " + ", " .join (_REQUIRED_SCHEMA_KEYS )
135138 )
136- schema_meta = {
137- k .decode (): orjson .loads (v ) for k , v in table .schema .metadata .items () if k .decode () in _REQUIRED_SCHEMA_KEYS
138- }
139+ schema_meta = {}
140+ for _k , _v in table .schema .metadata .items ():
141+ try :
142+ _k_str = _k .decode ()
143+ except UnicodeDecodeError :
144+ continue
145+ if _k_str in _REQUIRED_SCHEMA_KEYS :
146+ schema_meta [_k_str ] = orjson .loads (_v )
139147 missing = [k for k in _REQUIRED_SCHEMA_KEYS if k not in schema_meta ]
140148 if missing :
141149 raise ValueError (
@@ -242,10 +250,15 @@ def _build_inline_index(
242250 totals_meta = xtab_meta .get ("totalsMetadata" , {})
243251 total_ref_vals : list = [None ] * table .num_rows
244252 if totals_meta :
245- for field in table .schema :
246- if field .name .startswith (_COL_TOTAL_REF_PREFIX ):
247- total_ref_vals = table .column (field .name ).to_pylist ()
248- break
253+ total_ref_cols = [f .name for f in table .schema if f .name .startswith (_COL_TOTAL_REF_PREFIX )]
254+ if total_ref_cols :
255+ if len (total_ref_cols ) > 1 :
256+ logger .warning (
257+ "Arrow table has %d __total_ref* columns; only %r is used for aggregation names." ,
258+ len (total_ref_cols ),
259+ total_ref_cols [0 ],
260+ )
261+ total_ref_vals = table .column (total_ref_cols [0 ]).to_pylist ()
249262
250263 # Precompute per-row aggregation name and kept-label set for total rows.
251264 agg_for_row : list [str | None ] = [None ] * table .num_rows
@@ -268,16 +281,17 @@ def _build_inline_index(
268281 values = table .column (lid ).to_pylist ()
269282 processed = []
270283 for i , v in enumerate (values ):
271- if row_types [i ] != 0 and isinstance (v , str ):
272- if ref in kept_labels_for_row [i ]:
273- # Outer label kept as real attribute value in a subtotal row.
274- processed .append (v )
275- elif v == "" :
276- # Aggregated level left empty by the server — fill with agg name.
277- processed .append (agg_for_row [i ] if agg_for_row [i ] else v )
284+ if row_types [i ] != 0 :
285+ if isinstance (v , str ):
286+ if ref in kept_labels_for_row [i ]:
287+ processed .append (v )
288+ elif v == "" :
289+ processed .append (agg_for_row [i ] if agg_for_row [i ] else v )
290+ else :
291+ processed .append (v .upper ())
278292 else :
279- # Aggregation function marker (e.g. 'sum') — uppercase it .
280- processed .append (v . upper () )
293+ # Non-string value in a total row — replace with the aggregation name when available .
294+ processed .append (agg_for_row [ i ] if agg_for_row [ i ] is not None else v )
281295 else :
282296 processed .append (v )
283297 arrays .append (processed )
@@ -466,6 +480,11 @@ def _label_ids_in_dim(dim: dict) -> set:
466480 (dim for dim in execution_dims if col_ref_label_ids <= _label_ids_in_dim (dim )),
467481 {},
468482 )
483+ if not col_dim and execution_dims :
484+ logger .warning (
485+ "No execution dimension contains column label IDs %s; column_totals_indexes will be empty." ,
486+ col_ref_label_ids ,
487+ )
469488 else :
470489 col_dim = next (
471490 (dim for dim in execution_dims if any ("measureGroupHeaders" in h for h in dim .get ("headers" , []))),
@@ -542,6 +561,11 @@ def _label_ids_in_dim(dim: dict) -> set:
542561 (dim for dim in execution_dims if ref_label_ids <= _label_ids_in_dim (dim )),
543562 {},
544563 )
564+ if not row_dim and execution_dims :
565+ logger .warning (
566+ "No execution dimension contains row label IDs %s; row_totals_indexes will be empty." ,
567+ ref_label_ids ,
568+ )
545569 else :
546570 # Metrics-only: the dimension containing measureGroupHeaders is the output-row dim.
547571 row_dim = next (
0 commit comments