Skip to content

Commit add06e9

Browse files
committed
feat(gooddata-pandas): add max_bytes to arrow path and improve logging
risk: low
1 parent 6fb331c commit add06e9

1 file changed

Lines changed: 17 additions & 5 deletions

File tree

  • packages/gooddata-sdk/src/gooddata_sdk/compute/model

packages/gooddata-sdk/src/gooddata_sdk/compute/model/execution.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# (C) 2022 GoodData Corporation
22
from __future__ import annotations
33

4+
import io
45
import logging
56
from typing import TYPE_CHECKING, Any, Union
67

@@ -382,12 +383,16 @@ def read_result(
382383
)
383384
return ExecutionResult(execution_result)
384385

385-
def read_result_arrow(self) -> pyarrow.Table:
386+
def read_result_arrow(self, max_bytes: int | None = None) -> pyarrow.Table:
386387
"""
387388
Reads the full execution result as a pyarrow Table.
388389
389390
The binary endpoint returns the complete result in one shot (no paging).
390391
Requires pyarrow to be installed (pip install gooddata-sdk[arrow]).
392+
393+
Args:
394+
max_bytes: Optional byte-size limit. Raises ResultSizeBytesLimitExceeded when
395+
the response body exceeds this value.
391396
"""
392397
if _ipc is None:
393398
raise ImportError(
@@ -408,12 +413,19 @@ def read_result_arrow(self) -> pyarrow.Table:
408413
_return_http_data_only=True,
409414
)
410415
try:
411-
# Stream directly: Arrow reads chunk-by-chunk, never allocating the full
412-
# payload as a Python bytes object (~1× payload peak instead of ~2×).
416+
if max_bytes is not None:
417+
# Buffer first so we can check size before parsing.
418+
data = response.read()
419+
if len(data) > max_bytes:
420+
raise ResultSizeBytesLimitExceeded(
421+
result_size_bytes_limit=max_bytes,
422+
actual_result_bytes_size=len(data),
423+
)
424+
return _ipc.open_stream(io.BytesIO(data)).read_all()
413425
return _ipc.open_stream(response).read_all()
414426
finally:
415-
# Drain the HTTP chunked-encoding terminator (0\r\n\r\n) that Arrow
416-
# leaves unread after the IPC EOS marker, so the connection is fully
427+
# Drain the HTTP chunked-encoding terminator (0\r\n\r\n) that pyarrow might
428+
# leave unread after the IPC EOS marker, so the connection is fully
417429
# consumed before being returned to the urllib3 pool.
418430
response.read()
419431
response.release_conn()

0 commit comments

Comments
 (0)