Skip to content

Commit 3b4adc4

Browse files
committed
[GR-74749] Treat foreign host buffers as Python buffers.
PullRequest: graalpython/4374
2 parents 2d73936 + 7478d7e commit 3b4adc4

File tree

5 files changed

+435
-2
lines changed

5 files changed

+435
-2
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ This changelog summarizes major changes between GraalVM versions of the Python
44
language runtime. The main focus is on user-observable behavior of the engine.
55

66
## Version 25.1.0
7+
* Treat foreign buffer objects as Python buffer-compatible binary objects, so APIs like `memoryview`, `bytes`, `bytearray`, `binascii.hexlify`, and `io.BytesIO` work naturally on them when embedding GraalPy in Java. This allows passing binary data between Python and Java's `ByteBuffer` and `ByteSequence` types with minimal (sometimes zero) copies.
78
* Add support for [Truffle source options](https://www.graalvm.org/truffle/javadoc/com/oracle/truffle/api/source/Source.SourceBuilder.html#option(java.lang.String,java.lang.String)):
89
* The `python.Optimize` option can be used to specify the optimization level, like the `-O` (level 1) and `-OO` (level 2) commandline options.
910
* The `python.NewGlobals` option can be used to run a source with a fresh globals dictionary instead of the main module globals, which is useful for embeddings that want isolated top-level execution.

docs/user/Interoperability.md

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,60 @@ assert l == [6]
118118

119119
See the [Interop Types to Python](#interop-types-to-python) section for more interop traits and how they map to Python types.
120120

121+
## Passing Binary Data Between Java and Python
122+
123+
Passing binary data between Java and Python deserves attention:
124+
125+
- Java code typically uses `byte[]` or `java.nio.ByteBuffer`
126+
- Python code typically uses `bytes`, `bytearray`, `memoryview`, or file-like APIs such as `io.BytesIO`
127+
128+
### Java to Python
129+
130+
Raw Java `byte[]` are accessible as `list`-like objects in Python.
131+
Only integral values that fit into a signed `byte` can be read from or written to such objects.
132+
Python, on the other hand, usually exposes binary data as unsigned byte values.
133+
To achieve the equivalent of a "re-interpreting cast", Java byte arrays should be passed to Python using `ByteBuffer.wrap(byte[])`:
134+
135+
```java
136+
import java.nio.ByteBuffer;
137+
byte[] data = ...;
138+
ByteBuffer buffer = ByteBuffer.wrap(data); // does not copy
139+
context.getBindings("python").putMember("java_buffer", buffer);
140+
```
141+
142+
Python can then use the object through buffer-oriented binary data APIs:
143+
144+
```python
145+
memoryview(java_buffer) # does not copy
146+
bytes(java_buffer) # copies into an immutable Python-owned buffer
147+
bytearray(java_buffer) # copies into a mutable Python-owned buffer
148+
io.BytesIO(java_buffer) # copies into BytesIO's internal storage
149+
```
150+
151+
### Python to Java
152+
153+
Python `bytes` and other bytes-like objects can be interpreted like any `java.lang.List`.
154+
Because Python bytes are usually unsigned, however, they cannot simply be converted via `Value#as(byte[].class)` if any values are larger than 127.
155+
The Graal polyglot sdk provides `org.graalvm.polyglot.io.ByteSequence` as a target type to deal with this issue explicitly.
156+
157+
```java
158+
import org.graalvm.polyglot.Value;
159+
import org.graalvm.polyglot.io.ByteSequence;
160+
Value result = context.eval("python", "b'hello'");
161+
ByteSequence seq = result.as(ByteSequence.class); // does not copy
162+
```
163+
164+
`ByteSequence` keeps the data as a Python-owned byte sequence without immediately copying.
165+
It provides a `toByteArray()` method that deals with re-interpreting unsigned Python bytes as signed Java bytes.
166+
167+
```java
168+
import java.nio.charset.StandardCharsets;
169+
import org.graalvm.polyglot.io.ByteSequence;
170+
ByteSequence seq = result.as(ByteSequence.class);
171+
byte[] bytes = seq.toByteArray(); // copies into Java byte[]
172+
String s = new String(bytes, StandardCharsets.UTF_8);
173+
```
174+
121175
## Call Other Languages from Python
122176

123177
The _polyglot_ API allows non-JVM specific interactions with other languages from Python scripts.

graalpython/com.oracle.graal.python.test/src/com/oracle/graal/python/test/interop/HostInteropTest.java

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2023, 2026, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* The Universal Permissive License (UPL), Version 1.0
@@ -46,13 +46,15 @@
4646
import static org.junit.Assert.assertTrue;
4747

4848
import java.math.BigInteger;
49+
import java.nio.ByteBuffer;
4950
import java.nio.ByteOrder;
5051
import java.time.LocalDate;
5152
import java.time.LocalTime;
5253
import java.time.ZoneId;
5354

5455
import org.graalvm.polyglot.Context;
5556
import org.graalvm.polyglot.Value;
57+
import org.graalvm.polyglot.io.ByteSequence;
5658
import org.junit.After;
5759
import org.junit.Before;
5860
import org.junit.Test;
@@ -713,4 +715,79 @@ public void testByteBuffer() {
713715
t.writeBufferDouble(ByteOrder.LITTLE_ENDIAN, 0, 12345.6789123);
714716
assertEquals(12345.6789123, t.readBufferDouble(ByteOrder.LITTLE_ENDIAN, 0), 0.0);
715717
}
718+
719+
@Test
720+
public void testHostByteBufferAsPythonBuffer() {
721+
byte[] writable = new byte[]{1, 2, 3, 4};
722+
context.getBindings("python").putMember("writable_bb", ByteBuffer.wrap(writable));
723+
context.getBindings("python").putMember("readonly_bb", ByteBuffer.wrap(new byte[]{-1, 5, 6, 7, 8}).asReadOnlyBuffer());
724+
725+
context.eval("python", """
726+
import binascii
727+
import io
728+
729+
mv = memoryview(writable_bb)
730+
assert not mv.readonly
731+
assert mv.tobytes() == b"\\x01\\x02\\x03\\x04"
732+
assert bytes(writable_bb) == b"\\x01\\x02\\x03\\x04"
733+
assert bytearray(writable_bb) == bytearray(b"\\x01\\x02\\x03\\x04")
734+
assert binascii.hexlify(writable_bb) == b"01020304"
735+
bio = io.BytesIO()
736+
assert bio.write(writable_bb) == 4
737+
assert bio.getvalue() == b"\\x01\\x02\\x03\\x04"
738+
mv[1] = 9
739+
assert io.BytesIO(b"abcd").readinto(writable_bb) == 4
740+
assert bytes(writable_bb) == b"abcd"
741+
742+
ro = memoryview(readonly_bb)
743+
assert ro.readonly
744+
assert ro.tobytes() == b"\\xff\\x05\\x06\\x07\\x08"
745+
assert bytes(readonly_bb) == b"\\xff\\x05\\x06\\x07\\x08"
746+
assert bytearray(readonly_bb) == bytearray(b"\\xff\\x05\\x06\\x07\\x08")
747+
assert io.BytesIO().write(readonly_bb) == 5
748+
try:
749+
ro[0] = 1
750+
raise AssertionError("expected memoryview write to fail")
751+
except TypeError:
752+
pass
753+
try:
754+
io.BytesIO(b"wxyz").readinto(readonly_bb)
755+
raise AssertionError("expected readinto to fail")
756+
except TypeError:
757+
pass
758+
""");
759+
760+
assertArrayEquals(new byte[]{'a', 'b', 'c', 'd'}, writable);
761+
}
762+
763+
@Test
764+
public void testHostByteSequenceAsPythonBuffer() {
765+
byte[] bytes = new byte[]{10, 20, 30, 40};
766+
context.getBindings("python").putMember("seq", ByteSequence.create(bytes));
767+
768+
context.eval("python", """
769+
import binascii
770+
import io
771+
772+
mv = memoryview(seq)
773+
assert mv.readonly
774+
assert mv.tobytes() == b"\\x0a\\x14\\x1e\\x28"
775+
assert bytes(seq) == b"\\x0a\\x14\\x1e\\x28"
776+
assert bytearray(seq) == bytearray(b"\\x0a\\x14\\x1e\\x28")
777+
assert binascii.hexlify(seq) == b"0a141e28"
778+
bio = io.BytesIO()
779+
assert bio.write(seq) == 4
780+
assert bio.getvalue() == b"\\x0a\\x14\\x1e\\x28"
781+
try:
782+
mv[0] = 1
783+
raise AssertionError("expected memoryview write to fail")
784+
except TypeError:
785+
pass
786+
try:
787+
io.BytesIO(b"abcd").readinto(seq)
788+
raise AssertionError("expected readinto to fail")
789+
except TypeError:
790+
pass
791+
""");
792+
}
716793
}

0 commit comments

Comments
 (0)