@@ -40,6 +40,12 @@ def convert_to_nullable_types(df):
4040 df [col ] = df [col ].astype ("Float32" )
4141 elif dtype == "bool" :
4242 df [col ] = df [col ].astype ("boolean" )
43+ elif pd .api .types .is_object_dtype (df [col ]):
44+ non_null = df [col ].dropna ()
45+ if len (non_null ) and non_null .map (lambda x : isinstance (x , str )).all ():
46+ df [col ] = df [col ].astype ("string" )
47+ elif pd .api .types .is_string_dtype (df [col ]):
48+ df [col ] = df [col ].astype ("string" )
4349 return df
4450
4551
@@ -65,6 +71,7 @@ def test_dataframe_to_tsfile_basic():
6571 df_sorted = convert_to_nullable_types (
6672 df .sort_values ("time" ).reset_index (drop = True )
6773 )
74+ df_read = convert_to_nullable_types (df_read )
6875
6976 assert df_read .shape == (100 , 4 )
7077 assert df_read ["time" ].equals (df_sorted ["time" ])
@@ -109,6 +116,7 @@ def test_dataframe_to_tsfile_with_index():
109116
110117 df_read = to_dataframe (tsfile_path , table_name = "test_table" )
111118 df_read = df_read .sort_values ("time" ).reset_index (drop = True )
119+ df_read = convert_to_nullable_types (df_read )
112120 time_expected = pd .Series (df .index .values , dtype = "Int64" )
113121 assert df_read .shape == (30 , 3 )
114122 assert df_read ["time" ].equals (time_expected )
@@ -146,6 +154,7 @@ def test_dataframe_to_tsfile_custom_time_column():
146154 df_sorted = convert_to_nullable_types (
147155 df .sort_values ("timestamp" ).reset_index (drop = True )
148156 )
157+ df_read = convert_to_nullable_types (df_read )
149158
150159 assert df_read .shape == (30 , 3 )
151160 assert df_read ["timestamp" ].equals (df_sorted ["timestamp" ])
@@ -169,6 +178,7 @@ def test_dataframe_to_tsfile_case_insensitive_time():
169178 dataframe_to_tsfile (df , tsfile_path , table_name = "test_table" )
170179
171180 df_read = to_dataframe (tsfile_path , table_name = "test_table" )
181+ df_read = convert_to_nullable_types (df_read )
172182 assert df_read .shape == (20 , 2 )
173183 assert df_read ["time" ].equals (pd .Series ([i for i in range (20 )], dtype = "Int64" ))
174184 finally :
@@ -200,6 +210,7 @@ def test_dataframe_to_tsfile_with_tag_columns():
200210 df_sorted = convert_to_nullable_types (
201211 df .sort_values ("time" ).reset_index (drop = True )
202212 )
213+ df_read = convert_to_nullable_types (df_read )
203214
204215 assert df_read .shape == (20 , 4 )
205216 assert df_read ["device" ].equals (df_sorted ["device" ])
@@ -242,6 +253,7 @@ def test_dataframe_to_tsfile_tag_time_unsorted():
242253 df_read = to_dataframe (tsfile_path , table_name = "test_table" )
243254 df_expected = df .sort_values (by = ["device" , "time" ]).reset_index (drop = True )
244255 df_expected = convert_to_nullable_types (df_expected )
256+ df_read = convert_to_nullable_types (df_read )
245257
246258 assert df_read .shape == (10 , 3 )
247259 assert df_read ["device" ].equals (df_expected ["device" ])
@@ -281,6 +293,7 @@ def test_dataframe_to_tsfile_all_datatypes():
281293 df_sorted = convert_to_nullable_types (
282294 df .sort_values ("time" ).reset_index (drop = True )
283295 )
296+ df_read = convert_to_nullable_types (df_read )
284297
285298 assert df_read .shape == (50 , 11 )
286299 assert df_read ["bool_col" ].equals (df_sorted ["bool_col" ])
0 commit comments