Skip to content
Merged
29 changes: 29 additions & 0 deletions dataframely/columns/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,35 @@ def pyarrow_field(self, name: str) -> pa.Field:
def pyarrow_dtype(self) -> pa.DataType:
"""The :mod:`pyarrow` dtype equivalent of this column data type."""

# ----------------------------------- PYDANTIC ----------------------------------- #

def pydantic_field(self) -> Any:
"""Obtain a pydantic field type for this column definition.

Returns:
A pydantic-compatible type annotation that includes structured constraints
(e.g., min, max, regex) but excludes custom checks.

Warning:
Custom checks defined via the `check` parameter are not included in the
returned pydantic field. A UserWarning is raised if custom checks are present.
"""
import warnings

if self.check is not None:
warnings.warn(
f"Custom checks for column '{self.name or self.__class__.__name__}' "
"are not translated to pydantic constraints.",
UserWarning,
stacklevel=2,
)
Comment thread
borchero marked this conversation as resolved.

return self._pydantic_field_inner()

@abstractmethod
def _pydantic_field_inner(self) -> Any:
Comment thread
borchero marked this conversation as resolved.
Outdated
"""Subclasses implement this to return the actual pydantic field type."""

# ------------------------------------ HELPER ------------------------------------ #

@property
Expand Down
7 changes: 7 additions & 0 deletions dataframely/columns/any.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,10 @@ def pyarrow_dtype(self) -> pa.DataType:

def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
return pl.repeat(None, n, dtype=pl.Null, eager=True)

def _pydantic_field_inner(self) -> type:
"""Return pydantic field type for Any column."""
from typing import Any as AnyType

# Any columns are always nullable
return AnyType
25 changes: 25 additions & 0 deletions dataframely/columns/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,3 +148,28 @@ def as_dict(self, expr: pl.Expr) -> dict[str, Any]:
def from_dict(cls, data: dict[str, Any]) -> Self:
data["inner"] = column_from_dict(data["inner"])
return super().from_dict(data)

def _pydantic_field_inner(self) -> type:
"""Return pydantic field type for Array column."""
import warnings
from typing import Union

warnings.warn(
f"Array column '{self.name or self.__class__.__name__}' cannot be fully "
"translated to pydantic. Using list as the base type.",
UserWarning,
stacklevel=3,
)

# Get the inner type
Comment thread
borchero marked this conversation as resolved.
Outdated
inner_type = self.inner.pydantic_field()

# Build the type annotation - use list for arrays

base_type = list[inner_type] # type: ignore

# Handle nullability
if self.nullable:
return Union[base_type, None] # type: ignore

return base_type # type: ignore
8 changes: 8 additions & 0 deletions dataframely/columns/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,11 @@ def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
max_bytes=32,
null_probability=self._null_probability,
)

def _pydantic_field_inner(self) -> type[bytes] | None:
"""Return pydantic field type for binary column."""
if self.nullable:
from typing import Union

return Union[bytes, None] # type: ignore
return bytes
8 changes: 8 additions & 0 deletions dataframely/columns/bool.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,11 @@ def pyarrow_dtype(self) -> pa.DataType:

def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
return generator.sample_bool(n, null_probability=self._null_probability)

def _pydantic_field_inner(self) -> type[bool] | None:
"""Return pydantic field type for bool column."""
if self.nullable:
from typing import Union

return Union[bool, None] # type: ignore
return bool
9 changes: 9 additions & 0 deletions dataframely/columns/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,12 @@ def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
return generator.sample_string(
n, regex=r"[a-z]{1,2}", null_probability=self._null_probability
).cast(self.dtype)

def _pydantic_field_inner(self) -> type[str] | None:
"""Return pydantic field type for categorical column."""
# Categorical is essentially a string type
if self.nullable:
from typing import Union

return Union[str, None] # type: ignore
Comment thread
borchero marked this conversation as resolved.
Outdated
return str
197 changes: 197 additions & 0 deletions dataframely/columns/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,50 @@ def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
null_probability=self._null_probability,
)

def _pydantic_field_inner(self) -> type:
"""Return pydantic field type for Date column."""
import datetime as dt
import warnings
from typing import Annotated

from dataframely._compat import pydantic

# Warn about untranslated constraints
if self.resolution is not None:
warnings.warn(
f"Date column '{self.name or self.__class__.__name__}' has a resolution "
"constraint that cannot be translated to pydantic.",
UserWarning,
stacklevel=3,
)

# Build constraints
merged_kwargs = {}
if self.min is not None:
merged_kwargs["ge"] = self.min
if self.min_exclusive is not None:
merged_kwargs["gt"] = self.min_exclusive
if self.max is not None:
merged_kwargs["le"] = self.max
if self.max_exclusive is not None:
merged_kwargs["lt"] = self.max_exclusive
Comment thread
borchero marked this conversation as resolved.
Outdated

# Build the type annotation
base_type = dt.date

if merged_kwargs:
annotated_type = Annotated[base_type, pydantic.Field(**merged_kwargs)]
else:
annotated_type = base_type

# Handle nullability
if self.nullable:
from typing import Union

return Union[annotated_type, None] # type: ignore

return annotated_type # type: ignore


@register
class Time(OrdinalMixin[dt.time], Column):
Expand Down Expand Up @@ -278,6 +322,50 @@ def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
null_probability=self._null_probability,
)

def _pydantic_field_inner(self) -> type:
"""Return pydantic field type for Time column."""
import datetime as dt
import warnings
from typing import Annotated

from dataframely._compat import pydantic

# Warn about untranslated constraints
if self.resolution is not None:
warnings.warn(
f"Time column '{self.name or self.__class__.__name__}' has a resolution "
"constraint that cannot be translated to pydantic.",
UserWarning,
stacklevel=3,
)

# Build constraints
merged_kwargs = {}
if self.min is not None:
merged_kwargs["ge"] = self.min
if self.min_exclusive is not None:
merged_kwargs["gt"] = self.min_exclusive
if self.max is not None:
merged_kwargs["le"] = self.max
if self.max_exclusive is not None:
merged_kwargs["lt"] = self.max_exclusive

# Build the type annotation
base_type = dt.time

if merged_kwargs:
annotated_type = Annotated[base_type, pydantic.Field(**merged_kwargs)]
else:
annotated_type = base_type

# Handle nullability
if self.nullable:
from typing import Union

return Union[annotated_type, None] # type: ignore

return annotated_type # type: ignore


@register
class Datetime(OrdinalMixin[dt.datetime], Column):
Expand Down Expand Up @@ -425,6 +513,64 @@ def _attributes_match(
return lhs.utcoffset(now) == rhs.utcoffset(now)
return super()._attributes_match(lhs, rhs, name, column_expr)

def _pydantic_field_inner(self) -> type:
"""Return pydantic field type for Datetime column."""
import datetime as dt
import warnings
from typing import Annotated

from dataframely._compat import pydantic

# Warn about untranslated constraints
if self.resolution is not None:
warnings.warn(
f"Datetime column '{self.name or self.__class__.__name__}' has a resolution "
"constraint that cannot be translated to pydantic.",
UserWarning,
stacklevel=3,
)
if self.time_zone is not None:
warnings.warn(
f"Datetime column '{self.name or self.__class__.__name__}' has a time_zone "
"constraint that cannot be translated to pydantic.",
UserWarning,
stacklevel=3,
)
if self.time_unit != "us":
warnings.warn(
f"Datetime column '{self.name or self.__class__.__name__}' has a time_unit "
"constraint that cannot be translated to pydantic.",
UserWarning,
stacklevel=3,
)

# Build constraints
merged_kwargs = {}
if self.min is not None:
merged_kwargs["ge"] = self.min
if self.min_exclusive is not None:
merged_kwargs["gt"] = self.min_exclusive
if self.max is not None:
merged_kwargs["le"] = self.max
if self.max_exclusive is not None:
merged_kwargs["lt"] = self.max_exclusive

# Build the type annotation
base_type = dt.datetime

if merged_kwargs:
annotated_type = Annotated[base_type, pydantic.Field(**merged_kwargs)]
else:
annotated_type = base_type

# Handle nullability
if self.nullable:
from typing import Union

return Union[annotated_type, None] # type: ignore

return annotated_type # type: ignore


@register
class Duration(OrdinalMixin[dt.timedelta], Column):
Expand Down Expand Up @@ -550,6 +696,57 @@ def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
null_probability=self._null_probability,
)

def _pydantic_field_inner(self) -> type:
"""Return pydantic field type for Duration column."""
import datetime as dt
import warnings
from typing import Annotated

from dataframely._compat import pydantic

# Warn about untranslated constraints
if self.resolution is not None:
warnings.warn(
f"Duration column '{self.name or self.__class__.__name__}' has a resolution "
"constraint that cannot be translated to pydantic.",
UserWarning,
stacklevel=3,
)
if self.time_unit != "us":
warnings.warn(
f"Duration column '{self.name or self.__class__.__name__}' has a time_unit "
"constraint that cannot be translated to pydantic.",
UserWarning,
stacklevel=3,
)

# Build constraints
merged_kwargs = {}
if self.min is not None:
merged_kwargs["ge"] = self.min
if self.min_exclusive is not None:
merged_kwargs["gt"] = self.min_exclusive
if self.max is not None:
merged_kwargs["le"] = self.max
if self.max_exclusive is not None:
merged_kwargs["lt"] = self.max_exclusive

# Build the type annotation
base_type = dt.timedelta

if merged_kwargs:
annotated_type = Annotated[base_type, pydantic.Field(**merged_kwargs)]
else:
annotated_type = base_type

# Handle nullability
if self.nullable:
from typing import Union

return Union[annotated_type, None] # type: ignore

return annotated_type # type: ignore


# --------------------------------------- UTILS -------------------------------------- #

Expand Down
Loading
Loading