Skip to content

Commit 408e36d

Browse files
committed
Merge branch 'main' into qa
2 parents 5babe7d + ffa087a commit 408e36d

File tree

4 files changed

+74
-53
lines changed

4 files changed

+74
-53
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
# Changelog
2+
## v1.7.0 6/13/25
3+
- Use fastavro for avro encoding/decoding
4+
25
## v1.6.5 3/24/25
36
- Add capability to return PostgreSQL cursor description
47

pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "nypl_py_utils"
7-
version = "1.6.5"
7+
version = "1.7.0"
88
authors = [
99
{ name="Aaron Friedman", email="aaronfriedman@nypl.org" },
1010
]
@@ -24,7 +24,7 @@ dependencies = []
2424

2525
[project.optional-dependencies]
2626
avro-client = [
27-
"avro>=1.11.1",
27+
"fastavro>=1.11.1",
2828
"requests>=2.28.1"
2929
]
3030
cloudlibrary-client = [
@@ -82,7 +82,7 @@ development = [
8282
"flake8>=6.0.0",
8383
"freezegun>=1.2.2",
8484
"mock>=4.0.3",
85-
"pytest>=7.2.0",
85+
"pytest==8.0",
8686
"pytest-mock>=3.10.0",
8787
"requests-mock>=1.10.0"
8888
]

src/nypl_py_utils/classes/avro_client.py

Lines changed: 29 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
1-
import avro.schema
1+
import json
22
import requests
33

4-
from avro.errors import AvroException
5-
from avro.io import BinaryDecoder, BinaryEncoder, DatumReader, DatumWriter
4+
from fastavro import schemaless_writer, schemaless_reader, parse_schema
65
from io import BytesIO
76
from nypl_py_utils.functions.log_helper import create_log
87
from requests.adapters import HTTPAdapter, Retry
@@ -23,7 +22,7 @@ def __init__(self, platform_schema_url):
2322
self.session = requests.Session()
2423
self.session.mount("https://",
2524
HTTPAdapter(max_retries=retry_policy))
26-
self.schema = avro.schema.parse(
25+
self.schema = parse_schema(
2726
self.get_json_schema(platform_schema_url))
2827

2928
def get_json_schema(self, platform_schema_url):
@@ -52,7 +51,7 @@ def get_json_schema(self, platform_schema_url):
5251

5352
try:
5453
json_response = response.json()
55-
return json_response["data"]["schema"]
54+
return json.loads(json_response["data"]["schema"])
5655
except (JSONDecodeError, KeyError) as e:
5756
self.logger.error(
5857
"Retrieved schema is malformed: {errorType} {errorMessage}"
@@ -70,26 +69,28 @@ class AvroEncoder(AvroClient):
7069
Platform API endpoint from which to fetch the schema in JSON format.
7170
"""
7271

73-
def encode_record(self, record):
72+
def encode_record(self, record, silent=False):
7473
"""
7574
Encodes a single JSON record using the given Avro schema.
7675
7776
Returns the encoded record as a byte string.
7877
"""
79-
self.logger.debug(
80-
"Encoding record using {schema} schema".format(
81-
schema=self.schema.name)
82-
)
83-
datum_writer = DatumWriter(self.schema)
78+
if not silent:
79+
self.logger.info(
80+
"Encoding record using {schema} schema".format(
81+
schema=self.schema['name']
82+
)
83+
)
8484
with BytesIO() as output_stream:
85-
encoder = BinaryEncoder(output_stream)
8685
try:
87-
datum_writer.write(record, encoder)
86+
schemaless_writer(output_stream, self.schema, record,
87+
strict_allow_default=True)
8888
return output_stream.getvalue()
89-
except AvroException as e:
89+
except Exception as e:
9090
self.logger.error("Failed to encode record: {}".format(e))
9191
raise AvroClientError(
92-
"Failed to encode record: {}".format(e)) from None
92+
"Failed to encode record: {}".format(e)
93+
) from None
9394

9495
def encode_batch(self, record_list):
9596
"""
@@ -99,25 +100,11 @@ def encode_batch(self, record_list):
99100
"""
100101
self.logger.info(
101102
"Encoding ({num_rec}) records using {schema} schema".format(
102-
num_rec=len(record_list), schema=self.schema.name
103+
num_rec=len(record_list), schema=self.schema['name']
103104
)
104105
)
105-
encoded_records = []
106-
datum_writer = DatumWriter(self.schema)
107-
with BytesIO() as output_stream:
108-
encoder = BinaryEncoder(output_stream)
109-
for record in record_list:
110-
try:
111-
datum_writer.write(record, encoder)
112-
encoded_records.append(output_stream.getvalue())
113-
output_stream.seek(0)
114-
output_stream.truncate(0)
115-
except AvroException as e:
116-
self.logger.error("Failed to encode record: {}".format(e))
117-
raise AvroClientError(
118-
"Failed to encode record: {}".format(e)
119-
) from None
120-
return encoded_records
106+
return [self.encode_record(record, silent=True)
107+
for record in record_list]
121108

122109

123110
class AvroDecoder(AvroClient):
@@ -126,23 +113,22 @@ class AvroDecoder(AvroClient):
126113
Platform API endpoint from which to fetch the schema in JSON format.
127114
"""
128115

129-
def decode_record(self, record):
116+
def decode_record(self, record, silent=False):
130117
"""
131118
Decodes a single record represented using the given Avro
132119
schema. Input must be a bytes-like object.
133120
134121
Returns a dictionary where each key is a field in the schema.
135122
"""
136-
self.logger.debug(
137-
"Decoding {rec} using {schema} schema".format(
138-
rec=record, schema=self.schema.name
123+
if not silent:
124+
self.logger.info(
125+
"Decoding record using {schema} schema".format(
126+
schema=self.schema['name']
127+
)
139128
)
140-
)
141-
datum_reader = DatumReader(self.schema)
142129
with BytesIO(record) as input_stream:
143-
decoder = BinaryDecoder(input_stream)
144130
try:
145-
return datum_reader.read(decoder)
131+
return schemaless_reader(input_stream, self.schema)
146132
except Exception as e:
147133
self.logger.error("Failed to decode record: {}".format(e))
148134
raise AvroClientError(
@@ -157,14 +143,11 @@ def decode_batch(self, record_list):
157143
"""
158144
self.logger.info(
159145
"Decoding ({num_rec}) records using {schema} schema".format(
160-
num_rec=len(record_list), schema=self.schema.name
146+
num_rec=len(record_list), schema=self.schema['name']
161147
)
162148
)
163-
decoded_records = []
164-
for record in record_list:
165-
decoded_record = self.decode_record(record)
166-
decoded_records.append(decoded_record)
167-
return decoded_records
149+
return [self.decode_record(record, silent=True)
150+
for record in record_list]
168151

169152

170153
class AvroClientError(Exception):

tests/test_avro_client.py

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,44 @@
2020
]
2121
})}}
2222

23+
FASTAVRO_SCHEMA = {
24+
"type": "record",
25+
"name": "TestSchema",
26+
"fields": [
27+
{
28+
"name": "patron_id",
29+
"type": "int"
30+
},
31+
{
32+
"name": "library_branch",
33+
"type": [
34+
"null",
35+
"string"
36+
]
37+
}
38+
],
39+
"__fastavro_parsed": True,
40+
"__named_schemas": {
41+
"TestSchema": {
42+
"type": "record",
43+
"name": "TestSchema",
44+
"fields": [
45+
{
46+
"name": "patron_id",
47+
"type": "int"
48+
},
49+
{
50+
"name": "library_branch",
51+
"type": [
52+
"null",
53+
"string"
54+
]
55+
}
56+
]
57+
}
58+
}
59+
}
60+
2361

2462
class TestAvroClient:
2563
@pytest.fixture
@@ -36,10 +74,7 @@ def test_avro_decoder_instance(self, requests_mock):
3674

3775
def test_get_json_schema_success(self, test_avro_encoder_instance,
3876
test_avro_decoder_instance):
39-
assert test_avro_encoder_instance.schema == _TEST_SCHEMA["data"][
40-
"schema"]
41-
assert test_avro_decoder_instance.schema == _TEST_SCHEMA["data"][
42-
"schema"]
77+
assert test_avro_encoder_instance.schema == FASTAVRO_SCHEMA
4378

4479
def test_get_json_schema_error(self, requests_mock):
4580
requests_mock.get("https://test_schema_url", exc=ConnectTimeout)

0 commit comments

Comments
 (0)