Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/uipath-platform/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "uipath-platform"
version = "0.1.60"
version = "0.1.61"
description = "HTTP client library for programmatic access to UiPath Platform"
readme = { file = "README.md", content-type = "text/markdown" }
requires-python = ">=3.11"
Expand Down
5 changes: 5 additions & 0 deletions packages/uipath-platform/src/uipath/platform/_uipath.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
ProcessesService,
QueuesService,
)
from .pii_detection import PiiDetectionService
from .resource_catalog import ResourceCatalogService
from .semantic_proxy import SemanticProxyService

Expand Down Expand Up @@ -184,6 +185,10 @@ def orchestrator_setup(self) -> OrchestratorSetupService:
def automation_ops(self) -> AutomationOpsService:
return AutomationOpsService(self._config, self._execution_context)

@property
def pii_detection(self) -> PiiDetectionService:
return PiiDetectionService(self._config, self._execution_context)
Comment thread
yashwagle1 marked this conversation as resolved.

@property
def semantic_proxy(self) -> SemanticProxyService:
return SemanticProxyService(self._config, self._execution_context)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""PiiDetection service package.

Provides the ``PiiDetectionService`` client, Pydantic request/response models for
the PII detection endpoint, and utilities for rehydrating masked text with
original PII values after LLM processing.
"""

from ._pii_detection_service import PiiDetectionService
from .pii_detection import (
PiiDetectionRequest,
PiiDetectionResponse,
PiiDocument,
PiiDocumentResult,
PiiEntity,
PiiEntityThreshold,
PiiFile,
PiiFileResult,
)
from .pii_utilities import (
rehydrate_from_pii_entities,
rehydrate_from_pii_response,
)

__all__ = [
"PiiDetectionRequest",
"PiiDetectionResponse",
"PiiDetectionService",
"PiiDocument",
"PiiDocumentResult",
"PiiEntity",
"PiiEntityThreshold",
"PiiFile",
"PiiFileResult",
"rehydrate_from_pii_entities",
"rehydrate_from_pii_response",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""PiiDetection service for UiPath Platform.

Provides methods for detecting PII in documents and files.
"""

from uipath.core.tracing import traced

from ..common._base_service import BaseService
from ..common._config import UiPathApiConfig
from ..common._execution_context import UiPathExecutionContext
from ..common._models import Endpoint, RequestSpec
from .pii_detection import PiiDetectionRequest, PiiDetectionResponse

_PII_DETECTION_ENDPOINT = Endpoint("llmopstenant_/api/pii-detection")

# PII detection over documents/files can be slow, so override the default
# httpx client timeout (30s) with a longer per-request timeout.
_PII_DETECTION_TIMEOUT = 290.0


class PiiDetectionService(BaseService):
"""Service for detecting PII via UiPath."""

def __init__(
self,
config: UiPathApiConfig,
execution_context: UiPathExecutionContext,
) -> None:
super().__init__(config=config, execution_context=execution_context)

@traced(name="pii_detection_detect_pii", run_type="uipath")
def detect_pii(self, request: PiiDetectionRequest) -> PiiDetectionResponse:
"""Detect PII in the provided documents and/or files.

Args:
request: The PII detection request payload.

Returns:
The PII detection response.
"""
spec = self._pii_detection_spec(request)
response = self.request(
spec.method,
url=spec.endpoint,
json=spec.json,
headers=spec.headers,
scoped="tenant",
timeout=_PII_DETECTION_TIMEOUT,
)
return PiiDetectionResponse.model_validate(response.json())

@traced(name="pii_detection_detect_pii", run_type="uipath")
async def detect_pii_async(
self, request: PiiDetectionRequest
) -> PiiDetectionResponse:
"""Detect PII in the provided documents and/or files (async).

Args:
request: The PII detection request payload.

Returns:
The PII detection response.
"""
spec = self._pii_detection_spec(request)
response = await self.request_async(
spec.method,
url=spec.endpoint,
json=spec.json,
headers=spec.headers,
scoped="tenant",
timeout=_PII_DETECTION_TIMEOUT,
)
return PiiDetectionResponse.model_validate(response.json())

def _pii_detection_spec(self, request: PiiDetectionRequest) -> RequestSpec:
return RequestSpec(
method="POST",
endpoint=_PII_DETECTION_ENDPOINT,
json=request.model_dump(by_alias=True, exclude_none=True),
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""Public Pydantic models for the PiiDetection service."""

from typing import Optional

from pydantic import BaseModel, ConfigDict, Field


class PiiDocument(BaseModel):
"""A text document to scan for PII."""

id: str
role: str
document: str


class PiiFile(BaseModel):
"""A file reference to scan for PII."""

model_config = ConfigDict(populate_by_name=True)

file_name: str = Field(alias="fileName")
file_url: str = Field(alias="fileUrl")
file_type: str = Field(alias="fileType")


class PiiEntityThreshold(BaseModel):
"""Per-entity confidence threshold override."""

model_config = ConfigDict(populate_by_name=True)

category: str = Field(alias="pii-entity-category")
confidence_threshold: float = Field(alias="pii-entity-confidence-threshold")


class PiiDetectionRequest(BaseModel):
"""Request payload for the PII detection endpoint."""

model_config = ConfigDict(populate_by_name=True)

documents: Optional[list[PiiDocument]] = None
files: Optional[list[PiiFile]] = None
language_code: Optional[str] = Field(default=None, alias="languageCode")
confidence_threshold: Optional[float] = Field(
default=None, alias="confidenceThreshold"
)
entity_thresholds: Optional[list[PiiEntityThreshold]] = Field(
default=None, alias="entityThresholds"
)


class PiiEntity(BaseModel):
"""A single detected PII entity."""

model_config = ConfigDict(populate_by_name=True)

pii_text: str = Field(alias="piiText")
replacement_text: str = Field(alias="replacementText")
pii_type: str = Field(alias="piiType")
offset: int
confidence_score: float = Field(alias="confidenceScore")


class PiiDocumentResult(BaseModel):
"""PII detection result for a single document."""

model_config = ConfigDict(populate_by_name=True)

id: str
role: str
masked_document: str = Field(alias="maskedDocument")
initial_document: str = Field(alias="initialDocument")
pii_entities: list[PiiEntity] = Field(default_factory=list, alias="piiEntities")


class PiiFileResult(BaseModel):
"""PII detection result for a single file (fileUrl is the redacted URL)."""

model_config = ConfigDict(populate_by_name=True)

file_name: str = Field(alias="fileName")
file_url: str = Field(alias="fileUrl")
pii_entities: list[PiiEntity] = Field(default_factory=list, alias="piiEntities")


class PiiDetectionResponse(BaseModel):
"""Response payload from the PII detection endpoint."""

model_config = ConfigDict(populate_by_name=True)

response: list[PiiDocumentResult] = Field(default_factory=list)
files: list[PiiFileResult] = Field(default_factory=list)
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
"""Utility methods for working with PII data.

Python port of UiPath.SemanticProxy.Client.PiiUtilities (C#).
"""

import re
from typing import Callable, Iterable

from .pii_detection import PiiDetectionResponse, PiiEntity


def rehydrate_from_pii_entities(
masked_text: str, pii_entities: Iterable[PiiEntity]
) -> str:
"""Rehydrate masked text by replacing PII placeholders with original values.

Placeholders (e.g. ``[Person-1]``) are matched case-insensitively and replaced
with the corresponding original PII text. The function also replaces variants
without the surrounding brackets (e.g. ``Person-1``) in case the LLM stripped
them in its output.

Args:
masked_text: The masked text with PII placeholders.
pii_entities: The PII entities containing the original values.

Returns:
The rehydrated text with original PII values.
"""
if not masked_text:
return masked_text

entities = [e for e in pii_entities if e.replacement_text]
if not entities:
return masked_text

# Sort by replacement text length descending to avoid substring collisions
# (e.g. "[Person-10]" must be replaced before "[Person-1]").
entities.sort(key=lambda e: len(e.replacement_text), reverse=True)

rehydrated = masked_text
for entity in entities:
if not entity.replacement_text or not entity.pii_text:
continue
# Replace the full placeholder (with brackets) case-insensitively.
# ``_literal_replacer`` bypasses regex backreference interpretation in the
# replacement string.
rehydrated = re.sub(
re.escape(entity.replacement_text),
_literal_replacer(entity.pii_text),
rehydrated,
flags=re.IGNORECASE,
)
# Also replace the content without brackets (in case the LLM dropped them).
if entity.replacement_text.startswith("[") and entity.replacement_text.endswith(
"]"
):
no_brackets = entity.replacement_text[1:-1]
rehydrated = re.sub(
re.escape(no_brackets),
_literal_replacer(entity.pii_text),
rehydrated,
flags=re.IGNORECASE,
)

return rehydrated


def _literal_replacer(replacement: str) -> Callable[[re.Match[str]], str]:
"""Return a replacement function that ignores regex backreference syntax."""

def replace(_match: re.Match[str]) -> str:
return replacement

return replace


def rehydrate_from_pii_response(
masked_text: str, response: PiiDetectionResponse
) -> str:
"""Rehydrate masked text using all PII entities from a detection response.

Merges entities from both ``response.response`` (detected in documents/prompts)
and ``response.files`` (detected in files), so placeholders originating from
either source are rehydrated.

Args:
masked_text: The masked text with PII placeholders.
response: The PII detection response containing entities to rehydrate.

Returns:
The rehydrated text with original PII values.
"""
entities: list[PiiEntity] = []
for doc in response.response:
entities.extend(doc.pii_entities)
for file in response.files:
entities.extend(file.pii_entities)
return rehydrate_from_pii_entities(masked_text, entities)
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
Python port of UiPath.SemanticProxy.Client.PiiUtilities (C#).
"""

import json
import re
from typing import Callable, Iterable

Expand Down Expand Up @@ -42,13 +41,12 @@ def rehydrate_from_pii_entities(
for entity in entities:
if not entity.replacement_text or not entity.pii_text:
continue
escaped_pii = _add_escape_characters(entity.pii_text)
# Replace the full placeholder (with brackets) case-insensitively.
# ``_literal_replacer`` bypasses regex backreference interpretation in the
# replacement string.
rehydrated = re.sub(
re.escape(entity.replacement_text),
_literal_replacer(escaped_pii),
_literal_replacer(entity.pii_text),
rehydrated,
flags=re.IGNORECASE,
)
Expand All @@ -59,7 +57,7 @@ def rehydrate_from_pii_entities(
no_brackets = entity.replacement_text[1:-1]
rehydrated = re.sub(
re.escape(no_brackets),
_literal_replacer(escaped_pii),
_literal_replacer(entity.pii_text),
rehydrated,
flags=re.IGNORECASE,
)
Expand Down Expand Up @@ -98,18 +96,3 @@ def rehydrate_from_pii_response(
for file in response.files:
entities.extend(file.pii_entities)
return rehydrate_from_pii_entities(masked_text, entities)


def _add_escape_characters(text: str) -> str:
"""Escape special characters in text using JSON serialization.

Mirrors C# ``AddEscapeCharacters`` — serializes as JSON then strips the
surrounding quotes to get the escaped content.
"""
if not text:
return ""
try:
serialized = json.dumps(text, ensure_ascii=False)
return serialized[1:-1]
except (TypeError, ValueError):
return text
Loading
Loading