Quantco · Oliver Borchert (borchero) · Apr 13, 2026 · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026
@@ -222,6 +222,35 @@ def pyarrow_field(self, name: str) -> pa.Field:
     def pyarrow_dtype(self) -> pa.DataType:
         """The :mod:`pyarrow` dtype equivalent of this column data type."""
 
+    # ----------------------------------- PYDANTIC ----------------------------------- #
+
+    def pydantic_field(self) -> Any:
+        """Obtain a pydantic field type for this column definition.
+
+        Returns:
+            A pydantic-compatible type annotation that includes structured constraints
+            (e.g., min, max, regex) but excludes custom checks.
+
+        Warning:
+            Custom checks defined via the `check` parameter are not included in the
+            returned pydantic field. A UserWarning is raised if custom checks are present.
+        """
+        import warnings
+
+        if self.check is not None:
+            warnings.warn(
+                f"Custom checks for column '{self.name or self.__class__.__name__}' "
+                "are not translated to pydantic constraints.",
+                UserWarning,
+                stacklevel=2,
+            )
+
+        return self._pydantic_field_inner()
+
+    @abstractmethod
+    def _pydantic_field_inner(self) -> Any:
+        """Subclasses implement this to return the actual pydantic field type."""
+
     # ------------------------------------ HELPER ------------------------------------ #
 
     @property

@@ -79,3 +79,10 @@ def pyarrow_dtype(self) -> pa.DataType:
 
     def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
         return pl.repeat(None, n, dtype=pl.Null, eager=True)
+
+    def _pydantic_field_inner(self) -> type:
+        """Return pydantic field type for Any column."""
+        from typing import Any as AnyType
+
+        # Any columns are always nullable
+        return AnyType
@@ -148,3 +148,28 @@ def as_dict(self, expr: pl.Expr) -> dict[str, Any]:
     def from_dict(cls, data: dict[str, Any]) -> Self:
         data["inner"] = column_from_dict(data["inner"])
         return super().from_dict(data)
+
+    def _pydantic_field_inner(self) -> type:
+        """Return pydantic field type for Array column."""
+        import warnings
+        from typing import Union
+
+        warnings.warn(
+            f"Array column '{self.name or self.__class__.__name__}' cannot be fully "
+            "translated to pydantic. Using list as the base type.",
+            UserWarning,
+            stacklevel=3,
+        )
+
+        # Get the inner type
+        inner_type = self.inner.pydantic_field()
+
+        # Build the type annotation - use list for arrays
+
+        base_type = list[inner_type]  # type: ignore
+
+        # Handle nullability
+        if self.nullable:
+            return Union[base_type, None]  # type: ignore
+
+        return base_type  # type: ignore
@@ -38,3 +38,11 @@ def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
             max_bytes=32,
             null_probability=self._null_probability,
         )
+
+    def _pydantic_field_inner(self) -> type[bytes] | None:
+        """Return pydantic field type for binary column."""
+        if self.nullable:
+            from typing import Union
+
+            return Union[bytes, None]  # type: ignore
+        return bytes
@@ -29,3 +29,11 @@ def pyarrow_dtype(self) -> pa.DataType:
 
     def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
         return generator.sample_bool(n, null_probability=self._null_probability)
+
+    def _pydantic_field_inner(self) -> type[bool] | None:
+        """Return pydantic field type for bool column."""
+        if self.nullable:
+            from typing import Union
+
+            return Union[bool, None]  # type: ignore
+        return bool
@@ -76,3 +76,12 @@ def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
         return generator.sample_string(
             n, regex=r"[a-z]{1,2}", null_probability=self._null_probability
         ).cast(self.dtype)
+
+    def _pydantic_field_inner(self) -> type[str] | None:
+        """Return pydantic field type for categorical column."""
+        # Categorical is essentially a string type
+        if self.nullable:
+            from typing import Union
+
+            return Union[str, None]  # type: ignore
+        return str
@@ -149,6 +149,50 @@ def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
             null_probability=self._null_probability,
         )
 
+    def _pydantic_field_inner(self) -> type:
+        """Return pydantic field type for Date column."""
+        import datetime as dt
+        import warnings
+        from typing import Annotated
+
+        from dataframely._compat import pydantic
+
+        # Warn about untranslated constraints
+        if self.resolution is not None:
+            warnings.warn(
+                f"Date column '{self.name or self.__class__.__name__}' has a resolution "
+                "constraint that cannot be translated to pydantic.",
+                UserWarning,
+                stacklevel=3,
+            )
+
+        # Build constraints
+        merged_kwargs = {}
+        if self.min is not None:
+            merged_kwargs["ge"] = self.min
+        if self.min_exclusive is not None:
+            merged_kwargs["gt"] = self.min_exclusive
+        if self.max is not None:
+            merged_kwargs["le"] = self.max
+        if self.max_exclusive is not None:
+            merged_kwargs["lt"] = self.max_exclusive
+
+        # Build the type annotation
+        base_type = dt.date
+
+        if merged_kwargs:
+            annotated_type = Annotated[base_type, pydantic.Field(**merged_kwargs)]
+        else:
+            annotated_type = base_type
+
+        # Handle nullability
+        if self.nullable:
+            from typing import Union
+
+            return Union[annotated_type, None]  # type: ignore
+
+        return annotated_type  # type: ignore
+
 
 @register
 class Time(OrdinalMixin[dt.time], Column):
@@ -278,6 +322,50 @@ def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
             null_probability=self._null_probability,
         )
 
+    def _pydantic_field_inner(self) -> type:
+        """Return pydantic field type for Time column."""
+        import datetime as dt
+        import warnings
+        from typing import Annotated
+
+        from dataframely._compat import pydantic
+
+        # Warn about untranslated constraints
+        if self.resolution is not None:
+            warnings.warn(
+                f"Time column '{self.name or self.__class__.__name__}' has a resolution "
+                "constraint that cannot be translated to pydantic.",
+                UserWarning,
+                stacklevel=3,
+            )
+
+        # Build constraints
+        merged_kwargs = {}
+        if self.min is not None:
+            merged_kwargs["ge"] = self.min
+        if self.min_exclusive is not None:
+            merged_kwargs["gt"] = self.min_exclusive
+        if self.max is not None:
+            merged_kwargs["le"] = self.max
+        if self.max_exclusive is not None:
+            merged_kwargs["lt"] = self.max_exclusive
+
+        # Build the type annotation
+        base_type = dt.time
+
+        if merged_kwargs:
+            annotated_type = Annotated[base_type, pydantic.Field(**merged_kwargs)]
+        else:
+            annotated_type = base_type
+
+        # Handle nullability
+        if self.nullable:
+            from typing import Union
+
+            return Union[annotated_type, None]  # type: ignore
+
+        return annotated_type  # type: ignore
+
 
 @register
 class Datetime(OrdinalMixin[dt.datetime], Column):
@@ -425,6 +513,64 @@ def _attributes_match(
             return lhs.utcoffset(now) == rhs.utcoffset(now)
         return super()._attributes_match(lhs, rhs, name, column_expr)
 
+    def _pydantic_field_inner(self) -> type:
+        """Return pydantic field type for Datetime column."""
+        import datetime as dt
+        import warnings
+        from typing import Annotated
+
+        from dataframely._compat import pydantic
+
+        # Warn about untranslated constraints
+        if self.resolution is not None:
+            warnings.warn(
+                f"Datetime column '{self.name or self.__class__.__name__}' has a resolution "
+                "constraint that cannot be translated to pydantic.",
+                UserWarning,
+                stacklevel=3,
+            )
+        if self.time_zone is not None:
+            warnings.warn(
+                f"Datetime column '{self.name or self.__class__.__name__}' has a time_zone "
+                "constraint that cannot be translated to pydantic.",
+                UserWarning,
+                stacklevel=3,
+            )
+        if self.time_unit != "us":
+            warnings.warn(
+                f"Datetime column '{self.name or self.__class__.__name__}' has a time_unit "
+                "constraint that cannot be translated to pydantic.",
+                UserWarning,
+                stacklevel=3,
+            )
+
+        # Build constraints
+        merged_kwargs = {}
+        if self.min is not None:
+            merged_kwargs["ge"] = self.min
+        if self.min_exclusive is not None:
+            merged_kwargs["gt"] = self.min_exclusive
+        if self.max is not None:
+            merged_kwargs["le"] = self.max
+        if self.max_exclusive is not None:
+            merged_kwargs["lt"] = self.max_exclusive
+
+        # Build the type annotation
+        base_type = dt.datetime
+
+        if merged_kwargs:
+            annotated_type = Annotated[base_type, pydantic.Field(**merged_kwargs)]
+        else:
+            annotated_type = base_type
+
+        # Handle nullability
+        if self.nullable:
+            from typing import Union
+
+            return Union[annotated_type, None]  # type: ignore
+
+        return annotated_type  # type: ignore
+
 
 @register
 class Duration(OrdinalMixin[dt.timedelta], Column):
@@ -550,6 +696,57 @@ def _sample_unchecked(self, generator: Generator, n: int) -> pl.Series:
             null_probability=self._null_probability,
         )
 
+    def _pydantic_field_inner(self) -> type:
+        """Return pydantic field type for Duration column."""
+        import datetime as dt
+        import warnings
+        from typing import Annotated
+
+        from dataframely._compat import pydantic
+
+        # Warn about untranslated constraints
+        if self.resolution is not None:
+            warnings.warn(
+                f"Duration column '{self.name or self.__class__.__name__}' has a resolution "
+                "constraint that cannot be translated to pydantic.",
+                UserWarning,
+                stacklevel=3,
+            )
+        if self.time_unit != "us":
+            warnings.warn(
+                f"Duration column '{self.name or self.__class__.__name__}' has a time_unit "
+                "constraint that cannot be translated to pydantic.",
+                UserWarning,
+                stacklevel=3,
+            )
+
+        # Build constraints
+        merged_kwargs = {}
+        if self.min is not None:
+            merged_kwargs["ge"] = self.min
+        if self.min_exclusive is not None:
+            merged_kwargs["gt"] = self.min_exclusive
+        if self.max is not None:
+            merged_kwargs["le"] = self.max
+        if self.max_exclusive is not None:
+            merged_kwargs["lt"] = self.max_exclusive
+
+        # Build the type annotation
+        base_type = dt.timedelta
+
+        if merged_kwargs:
+            annotated_type = Annotated[base_type, pydantic.Field(**merged_kwargs)]
+        else:
+            annotated_type = base_type
+
+        # Handle nullability
+        if self.nullable:
+            from typing import Union
+
+            return Union[annotated_type, None]  # type: ignore
+
+        return annotated_type  # type: ignore
+
 
 # --------------------------------------- UTILS -------------------------------------- #