11# (C) 2026 GoodData Corporation
22from __future__ import annotations
33
4+ import logging
45from typing import Callable
56
67import orjson
78import pandas
9+ from gooddata_sdk .type_converter import AttributeConverterStore
810
911from gooddata_pandas .arrow_types import TypesMapper
1012
4244
4345_REQUIRED_SCHEMA_KEYS = (_META_XTAB , _META_MODEL , _META_VIEW )
4446
47+ logger = logging .getLogger (__name__ )
48+
49+
50+ def read_model_labels (table : pa .Table ) -> dict :
51+ """Return the ``labels`` dict from the Arrow table's ``x-gdc-model-v1`` schema metadata.
52+
53+ Returns an empty dict when the metadata key is absent so callers can use it
54+ unconditionally without extra None-checks.
55+ """
56+ if not table .schema .metadata or b"x-gdc-model-v1" not in table .schema .metadata :
57+ return {}
58+ return orjson .loads (table .schema .metadata [b"x-gdc-model-v1" ]).get ("labels" , {})
59+
60+
61+ def _get_date_converter_for_label (label_id : str , model_labels : dict ):
62+ """Return a type Converter for date-granularity labels, or None for plain text attributes.
63+
64+ Reads the ``granularity`` field from Arrow model metadata (``x-gdc-model-v1``) and
65+ looks up the matching converter in ``AttributeConverterStore``.
66+
67+ - ``DAY`` / ``MONTH`` / ``YEAR`` → ``DateConverter`` (→ ``pandas.Timestamp`` via external fn)
68+ - ``WEEK`` / ``QUARTER`` → ``StringConverter`` (no-op)
69+ - ``MINUTE`` / ``HOUR`` → ``DatetimeConverter``
70+ - No granularity (text attrs) → ``None`` (caller skips conversion)
71+ """
72+ info = model_labels .get (label_id , {})
73+ granularity = info .get ("granularity" )
74+ if not granularity :
75+ return None
76+ return AttributeConverterStore .find_converter ("DATE" , granularity .upper ())
77+
78+
79+ def convert_label_values (label_id : str , values : list , model_labels : dict ) -> list :
80+ """Apply date-granularity type conversion to a list of attribute values from an Arrow column.
81+
82+ Mirrors the non-Arrow execution path (``AttributeConverterStore`` in ``_typed_attribute_value``):
83+
84+ - ``DAY`` / ``MONTH`` / ``YEAR`` granularity → ``pandas.Timestamp``
85+ - ``WEEK`` / ``QUARTER`` → ``str`` (unchanged)
86+ - No granularity (text attributes) → values returned as the **same object**
87+
88+ ``None`` values are passed through unchanged.
89+
90+ Args:
91+ label_id: Arrow column name / GoodData label local ID.
92+ values: Raw values from ``table.column(label_id).to_pylist()``.
93+ model_labels: The ``labels`` dict from ``x-gdc-model-v1`` schema metadata
94+ (as returned by :func:`read_model_labels`).
95+
96+ Returns:
97+ Converted list, or the original *values* object when no conversion is needed.
98+ """
99+ converter = _get_date_converter_for_label (label_id , model_labels )
100+ if converter is None :
101+ return values
102+ return [converter .to_external_type (v ) if v is not None else None for v in values ]
103+
45104
46105def build_metric_field_index (table : pa .Table ) -> dict [int , str ]:
47106 """Return {metric_dimension_index: arrow_field_name} from the table schema.
@@ -77,9 +136,14 @@ def _parse_schema_metadata(table: pa.Table) -> dict:
77136 raise ValueError (
78137 "Arrow table has no schema metadata. Expected GoodData metadata keys: " + ", " .join (_REQUIRED_SCHEMA_KEYS )
79138 )
80- schema_meta = {
81- k .decode (): orjson .loads (v ) for k , v in table .schema .metadata .items () if k .decode () in _REQUIRED_SCHEMA_KEYS
82- }
139+ schema_meta = {}
140+ for _k , _v in table .schema .metadata .items ():
141+ try :
142+ _k_str = _k .decode ()
143+ except UnicodeDecodeError :
144+ continue
145+ if _k_str in _REQUIRED_SCHEMA_KEYS :
146+ schema_meta [_k_str ] = orjson .loads (_v )
83147 missing = [k for k in _REQUIRED_SCHEMA_KEYS if k not in schema_meta ]
84148 if missing :
85149 raise ValueError (
@@ -186,10 +250,15 @@ def _build_inline_index(
186250 totals_meta = xtab_meta .get ("totalsMetadata" , {})
187251 total_ref_vals : list = [None ] * table .num_rows
188252 if totals_meta :
189- for field in table .schema :
190- if field .name .startswith (_COL_TOTAL_REF_PREFIX ):
191- total_ref_vals = table .column (field .name ).to_pylist ()
192- break
253+ total_ref_cols = [f .name for f in table .schema if f .name .startswith (_COL_TOTAL_REF_PREFIX )]
254+ if total_ref_cols :
255+ if len (total_ref_cols ) > 1 :
256+ logger .warning (
257+ "Arrow table has %d __total_ref* columns; only %r is used for aggregation names." ,
258+ len (total_ref_cols ),
259+ total_ref_cols [0 ],
260+ )
261+ total_ref_vals = table .column (total_ref_cols [0 ]).to_pylist ()
193262
194263 # Precompute per-row aggregation name and kept-label set for total rows.
195264 agg_for_row : list [str | None ] = [None ] * table .num_rows
@@ -212,16 +281,17 @@ def _build_inline_index(
212281 values = table .column (lid ).to_pylist ()
213282 processed = []
214283 for i , v in enumerate (values ):
215- if row_types [i ] != 0 and isinstance (v , str ):
216- if ref in kept_labels_for_row [i ]:
217- # Outer label kept as real attribute value in a subtotal row.
218- processed .append (v )
219- elif v == "" :
220- # Aggregated level left empty by the server — fill with agg name.
221- processed .append (agg_for_row [i ] if agg_for_row [i ] else v )
284+ if row_types [i ] != 0 :
285+ if isinstance (v , str ):
286+ if ref in kept_labels_for_row [i ]:
287+ processed .append (v )
288+ elif v == "" :
289+ processed .append (agg_for_row [i ] if agg_for_row [i ] else v )
290+ else :
291+ processed .append (v .upper ())
222292 else :
223- # Aggregation function marker (e.g. 'sum') — uppercase it .
224- processed .append (v . upper () )
293+ # Non-string value in a total row — replace with the aggregation name when available .
294+ processed .append (agg_for_row [ i ] if agg_for_row [ i ] is not None else v )
225295 else :
226296 processed .append (v )
227297 arrays .append (processed )
@@ -410,6 +480,11 @@ def _label_ids_in_dim(dim: dict) -> set:
410480 (dim for dim in execution_dims if col_ref_label_ids <= _label_ids_in_dim (dim )),
411481 {},
412482 )
483+ if not col_dim and execution_dims :
484+ logger .warning (
485+ "No execution dimension contains column label IDs %s; column_totals_indexes will be empty." ,
486+ col_ref_label_ids ,
487+ )
413488 else :
414489 col_dim = next (
415490 (dim for dim in execution_dims if any ("measureGroupHeaders" in h for h in dim .get ("headers" , []))),
@@ -486,6 +561,11 @@ def _label_ids_in_dim(dim: dict) -> set:
486561 (dim for dim in execution_dims if ref_label_ids <= _label_ids_in_dim (dim )),
487562 {},
488563 )
564+ if not row_dim and execution_dims :
565+ logger .warning (
566+ "No execution dimension contains row label IDs %s; row_totals_indexes will be empty." ,
567+ ref_label_ids ,
568+ )
489569 else :
490570 # Metrics-only: the dimension containing measureGroupHeaders is the output-row dim.
491571 row_dim = next (
0 commit comments