Skip to content

Commit 865d75f

Browse files
authored
[Python] Fix Linux Py3.14 wheel CI and dataframe edge cases (#789)
* fix py3.9 black * ci wheel python 3.14 * ci fix * ci fix * fix ci * ci fix * ci fix
1 parent 5e8a38a commit 865d75f

6 files changed

Lines changed: 56 additions & 11 deletions

File tree

.github/workflows/wheels.yml

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ jobs:
6363
- name: Install build tools
6464
run: |
6565
python -m pip install -U pip wheel
66-
python -m pip install cibuildwheel==2.21.3
66+
python -m pip install cibuildwheel==3.4.1
6767
6868
- name: Pre-download virtualenv for cibuildwheel
6969
run: |
@@ -108,8 +108,8 @@ jobs:
108108
CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-* cp313-* cp314-*"
109109
CIBW_SKIP: "pp* *-musllinux*"
110110

111-
CIBW_MANYLINUX_X86_64_IMAGE: "manylinux2014"
112-
CIBW_MANYLINUX_AARCH64_IMAGE: "manylinux2014"
111+
CIBW_MANYLINUX_X86_64_IMAGE: "manylinux_2_28"
112+
CIBW_MANYLINUX_AARCH64_IMAGE: "manylinux_2_28"
113113

114114
MACOSX_DEPLOYMENT_TARGET: "12.0"
115115

@@ -222,7 +222,7 @@ jobs:
222222
strategy:
223223
fail-fast: false
224224
matrix:
225-
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
225+
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
226226
steps:
227227
- name: Checkout
228228
uses: actions/checkout@v4
@@ -268,8 +268,13 @@ jobs:
268268
# Build wheel via Maven (no clean — keep C++ artifacts from previous job)
269269
chmod +x mvnw || true
270270
cd python
271+
BLACK_VER_PROP=""
272+
case "${{ matrix.python-version }}" in
273+
3.9) BLACK_VER_PROP="-Dblack.version=25.11.0" ;;
274+
esac
271275
../mvnw package -DskipTests \
272-
-Dspotless.check.skip=true -Dspotless.apply.skip=true
276+
-Dspotless.check.skip=true -Dspotless.apply.skip=true \
277+
${BLACK_VER_PROP}
273278
ls -la dist/
274279
275280
- name: Verify wheel

python/pyproject.toml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,11 @@ maintainers = [
3737
]
3838
dependencies = [
3939
"numpy>=2.0.0,<3",
40-
"pandas>=2.0",
40+
"pandas>=2.0,<2.3; python_full_version < '3.14.0'",
41+
"pandas>=2.3.3; python_full_version >= '3.14.0'",
4142
"pyarrow>=16.0,<18; python_version<'3.10'",
42-
"pyarrow>=18.0,<20; python_version>='3.10'"
43+
"pyarrow>=18.0,<20; python_version>='3.10' and python_version<'3.14'",
44+
"pyarrow>=22.0,<24; python_version>='3.14'"
4345
]
4446

4547
[project.urls]

python/requirements.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@
1818
#
1919

2020
cython==3.0.10
21-
black==26.3.1
21+
black==25.11.0; python_version < "3.10"
22+
black==26.3.1; python_version >= "3.10"
2223
numpy>=2.0.0,<3
23-
pandas==2.2.2
24+
pandas==2.2.2; python_full_version < "3.14.0"
25+
pandas>=2.3.3; python_full_version >= "3.14.0"
2426
setuptools==78.1.1
2527
wheel==0.46.2
2628
pyarrow>=8.0.0

python/tests/test_dataframe.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,13 @@ def convert_to_nullable_types(df):
5151
df[col] = df[col].astype("Float32")
5252
elif dtype == "bool":
5353
df[col] = df[col].astype("boolean")
54+
elif pd.api.types.is_object_dtype(df[col]):
55+
non_null = df[col].dropna()
56+
if len(non_null) and non_null.map(lambda x: isinstance(x, str)).all():
57+
df[col] = df[col].astype("string")
58+
elif pd.api.types.is_string_dtype(df[col]):
59+
# NumPy/pandas str dtype (e.g. Py3.14) vs object: unify for Series.equals
60+
df[col] = df[col].astype("string")
5461
return df
5562

5663

@@ -92,6 +99,7 @@ def test_write_dataframe_basic():
9299
df_sorted = convert_to_nullable_types(
93100
df.sort_values("time").reset_index(drop=True)
94101
)
102+
df_read = convert_to_nullable_types(df_read)
95103
assert df_read.shape == (100, 4)
96104
assert df_read[TIME_COLUMN].equals(df_sorted["time"])
97105
assert df_read["device"].equals(df_sorted["device"])
@@ -128,6 +136,7 @@ def test_write_dataframe_with_index():
128136
df_read = df_read.sort_values(TIME_COLUMN).reset_index(drop=True)
129137
df_sorted = df.sort_index()
130138
df_sorted = convert_to_nullable_types(df_sorted.reset_index(drop=True))
139+
df_read = convert_to_nullable_types(df_read)
131140
time_series = pd.Series(df.sort_index().index.values, dtype="Int64")
132141
assert df_read.shape == (50, 3)
133142
assert df_read[TIME_COLUMN].equals(time_series)
@@ -166,6 +175,7 @@ def test_write_dataframe_case_insensitive():
166175
df_sorted = convert_to_nullable_types(
167176
df.sort_values("Time").reset_index(drop=True)
168177
)
178+
df_read = convert_to_nullable_types(df_read)
169179
assert df_read.shape == (30, 3)
170180
assert df_read[TIME_COLUMN].equals(df_sorted["Time"])
171181
assert df_read["device"].equals(df_sorted["Device"])
@@ -270,6 +280,7 @@ def test_write_dataframe_all_datatypes():
270280
df_sorted = convert_to_nullable_types(
271281
df.sort_values("time").reset_index(drop=True)
272282
)
283+
df_read = convert_to_nullable_types(df_read)
273284
assert df_read.shape == (50, 11)
274285
assert df_read["bool_col"].equals(df_sorted["bool_col"])
275286
assert df_read["int32_col"].equals(df_sorted["int32_col"])
@@ -317,6 +328,7 @@ def test_write_dataframe_schema_time_column():
317328
df_sorted = convert_to_nullable_types(
318329
df.sort_values("time").reset_index(drop=True)
319330
)
331+
df_read = convert_to_nullable_types(df_read)
320332
assert df_read.shape == (50, 3)
321333
assert df_read[TIME_COLUMN].equals(df_sorted[TIME_COLUMN])
322334
assert df_read["device"].equals(df_sorted["device"])
@@ -354,6 +366,7 @@ def test_write_dataframe_schema_time_and_dataframe_time():
354366
df_sorted = convert_to_nullable_types(
355367
df.sort_values("Time").rename(columns=str.lower).reset_index(drop=True)
356368
)
369+
df_read = convert_to_nullable_types(df_read)
357370
assert df_read.shape == (30, 3)
358371
assert df_read["time"].equals(df_sorted["time"])
359372
assert df_read["device"].equals(df_sorted["device"])

python/tests/test_to_tsfile.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,12 @@ def convert_to_nullable_types(df):
4040
df[col] = df[col].astype("Float32")
4141
elif dtype == "bool":
4242
df[col] = df[col].astype("boolean")
43+
elif pd.api.types.is_object_dtype(df[col]):
44+
non_null = df[col].dropna()
45+
if len(non_null) and non_null.map(lambda x: isinstance(x, str)).all():
46+
df[col] = df[col].astype("string")
47+
elif pd.api.types.is_string_dtype(df[col]):
48+
df[col] = df[col].astype("string")
4349
return df
4450

4551

@@ -65,6 +71,7 @@ def test_dataframe_to_tsfile_basic():
6571
df_sorted = convert_to_nullable_types(
6672
df.sort_values("time").reset_index(drop=True)
6773
)
74+
df_read = convert_to_nullable_types(df_read)
6875

6976
assert df_read.shape == (100, 4)
7077
assert df_read["time"].equals(df_sorted["time"])
@@ -109,6 +116,7 @@ def test_dataframe_to_tsfile_with_index():
109116

110117
df_read = to_dataframe(tsfile_path, table_name="test_table")
111118
df_read = df_read.sort_values("time").reset_index(drop=True)
119+
df_read = convert_to_nullable_types(df_read)
112120
time_expected = pd.Series(df.index.values, dtype="Int64")
113121
assert df_read.shape == (30, 3)
114122
assert df_read["time"].equals(time_expected)
@@ -146,6 +154,7 @@ def test_dataframe_to_tsfile_custom_time_column():
146154
df_sorted = convert_to_nullable_types(
147155
df.sort_values("timestamp").reset_index(drop=True)
148156
)
157+
df_read = convert_to_nullable_types(df_read)
149158

150159
assert df_read.shape == (30, 3)
151160
assert df_read["timestamp"].equals(df_sorted["timestamp"])
@@ -169,6 +178,7 @@ def test_dataframe_to_tsfile_case_insensitive_time():
169178
dataframe_to_tsfile(df, tsfile_path, table_name="test_table")
170179

171180
df_read = to_dataframe(tsfile_path, table_name="test_table")
181+
df_read = convert_to_nullable_types(df_read)
172182
assert df_read.shape == (20, 2)
173183
assert df_read["time"].equals(pd.Series([i for i in range(20)], dtype="Int64"))
174184
finally:
@@ -200,6 +210,7 @@ def test_dataframe_to_tsfile_with_tag_columns():
200210
df_sorted = convert_to_nullable_types(
201211
df.sort_values("time").reset_index(drop=True)
202212
)
213+
df_read = convert_to_nullable_types(df_read)
203214

204215
assert df_read.shape == (20, 4)
205216
assert df_read["device"].equals(df_sorted["device"])
@@ -242,6 +253,7 @@ def test_dataframe_to_tsfile_tag_time_unsorted():
242253
df_read = to_dataframe(tsfile_path, table_name="test_table")
243254
df_expected = df.sort_values(by=["device", "time"]).reset_index(drop=True)
244255
df_expected = convert_to_nullable_types(df_expected)
256+
df_read = convert_to_nullable_types(df_read)
245257

246258
assert df_read.shape == (10, 3)
247259
assert df_read["device"].equals(df_expected["device"])
@@ -281,6 +293,7 @@ def test_dataframe_to_tsfile_all_datatypes():
281293
df_sorted = convert_to_nullable_types(
282294
df.sort_values("time").reset_index(drop=True)
283295
)
296+
df_read = convert_to_nullable_types(df_read)
284297

285298
assert df_read.shape == (50, 11)
286299
assert df_read["bool_col"].equals(df_sorted["bool_col"])

python/tsfile/tsfile_table_writer.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,19 @@ def validate_dataframe_for_tsfile(df: pd.DataFrame) -> None:
3434
seen = set()
3535
duplicates = []
3636
for c in columns:
37-
if c is None or (isinstance(c, str) and len(c) == 0):
37+
if isinstance(c, str):
38+
if len(c) == 0:
39+
raise ValueError("Column name cannot be None or empty")
40+
lower = c.lower()
41+
elif c is None:
3842
raise ValueError("Column name cannot be None or empty")
39-
lower = c.lower()
43+
else:
44+
try:
45+
if pd.isna(c):
46+
raise ValueError("Column name cannot be None or empty")
47+
except TypeError:
48+
pass
49+
lower = str(c).lower()
4050
if lower in seen:
4151
duplicates.append(c)
4252
seen.add(lower)

0 commit comments

Comments
 (0)