-
Notifications
You must be signed in to change notification settings - Fork 26
move pii masking service to llmops #1701
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
+591
−25
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
36 changes: 36 additions & 0 deletions
36
packages/uipath-platform/src/uipath/platform/pii_detection/__init__.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| """PiiDetection service package. | ||
|
|
||
| Provides the ``PiiDetectionService`` client, Pydantic request/response models for | ||
| the PII detection endpoint, and utilities for rehydrating masked text with | ||
| original PII values after LLM processing. | ||
| """ | ||
|
|
||
| from ._pii_detection_service import PiiDetectionService | ||
| from .pii_detection import ( | ||
| PiiDetectionRequest, | ||
| PiiDetectionResponse, | ||
| PiiDocument, | ||
| PiiDocumentResult, | ||
| PiiEntity, | ||
| PiiEntityThreshold, | ||
| PiiFile, | ||
| PiiFileResult, | ||
| ) | ||
| from .pii_utilities import ( | ||
| rehydrate_from_pii_entities, | ||
| rehydrate_from_pii_response, | ||
| ) | ||
|
|
||
| __all__ = [ | ||
| "PiiDetectionRequest", | ||
| "PiiDetectionResponse", | ||
| "PiiDetectionService", | ||
| "PiiDocument", | ||
| "PiiDocumentResult", | ||
| "PiiEntity", | ||
| "PiiEntityThreshold", | ||
| "PiiFile", | ||
| "PiiFileResult", | ||
| "rehydrate_from_pii_entities", | ||
| "rehydrate_from_pii_response", | ||
| ] |
80 changes: 80 additions & 0 deletions
80
packages/uipath-platform/src/uipath/platform/pii_detection/_pii_detection_service.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,80 @@ | ||
| """PiiDetection service for UiPath Platform. | ||
|
|
||
| Provides methods for detecting PII in documents and files. | ||
| """ | ||
|
|
||
| from uipath.core.tracing import traced | ||
|
|
||
| from ..common._base_service import BaseService | ||
| from ..common._config import UiPathApiConfig | ||
| from ..common._execution_context import UiPathExecutionContext | ||
| from ..common._models import Endpoint, RequestSpec | ||
| from .pii_detection import PiiDetectionRequest, PiiDetectionResponse | ||
|
|
||
| _PII_DETECTION_ENDPOINT = Endpoint("llmopstenant_/api/pii-detection") | ||
|
|
||
| # PII detection over documents/files can be slow, so override the default | ||
| # httpx client timeout (30s) with a longer per-request timeout. | ||
| _PII_DETECTION_TIMEOUT = 290.0 | ||
|
|
||
|
|
||
| class PiiDetectionService(BaseService): | ||
| """Service for detecting PII via UiPath.""" | ||
|
|
||
| def __init__( | ||
| self, | ||
| config: UiPathApiConfig, | ||
| execution_context: UiPathExecutionContext, | ||
| ) -> None: | ||
| super().__init__(config=config, execution_context=execution_context) | ||
|
|
||
| @traced(name="pii_detection_detect_pii", run_type="uipath") | ||
| def detect_pii(self, request: PiiDetectionRequest) -> PiiDetectionResponse: | ||
| """Detect PII in the provided documents and/or files. | ||
|
|
||
| Args: | ||
| request: The PII detection request payload. | ||
|
|
||
| Returns: | ||
| The PII detection response. | ||
| """ | ||
| spec = self._pii_detection_spec(request) | ||
| response = self.request( | ||
| spec.method, | ||
| url=spec.endpoint, | ||
| json=spec.json, | ||
| headers=spec.headers, | ||
| scoped="tenant", | ||
| timeout=_PII_DETECTION_TIMEOUT, | ||
| ) | ||
| return PiiDetectionResponse.model_validate(response.json()) | ||
|
|
||
| @traced(name="pii_detection_detect_pii", run_type="uipath") | ||
| async def detect_pii_async( | ||
| self, request: PiiDetectionRequest | ||
| ) -> PiiDetectionResponse: | ||
| """Detect PII in the provided documents and/or files (async). | ||
|
|
||
| Args: | ||
| request: The PII detection request payload. | ||
|
|
||
| Returns: | ||
| The PII detection response. | ||
| """ | ||
| spec = self._pii_detection_spec(request) | ||
| response = await self.request_async( | ||
| spec.method, | ||
| url=spec.endpoint, | ||
| json=spec.json, | ||
| headers=spec.headers, | ||
| scoped="tenant", | ||
| timeout=_PII_DETECTION_TIMEOUT, | ||
| ) | ||
| return PiiDetectionResponse.model_validate(response.json()) | ||
|
|
||
| def _pii_detection_spec(self, request: PiiDetectionRequest) -> RequestSpec: | ||
| return RequestSpec( | ||
| method="POST", | ||
| endpoint=_PII_DETECTION_ENDPOINT, | ||
| json=request.model_dump(by_alias=True, exclude_none=True), | ||
| ) |
91 changes: 91 additions & 0 deletions
91
packages/uipath-platform/src/uipath/platform/pii_detection/pii_detection.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,91 @@ | ||
| """Public Pydantic models for the PiiDetection service.""" | ||
|
|
||
| from typing import Optional | ||
|
|
||
| from pydantic import BaseModel, ConfigDict, Field | ||
|
|
||
|
|
||
| class PiiDocument(BaseModel): | ||
| """A text document to scan for PII.""" | ||
|
|
||
| id: str | ||
| role: str | ||
| document: str | ||
|
|
||
|
|
||
| class PiiFile(BaseModel): | ||
| """A file reference to scan for PII.""" | ||
|
|
||
| model_config = ConfigDict(populate_by_name=True) | ||
|
|
||
| file_name: str = Field(alias="fileName") | ||
| file_url: str = Field(alias="fileUrl") | ||
| file_type: str = Field(alias="fileType") | ||
|
|
||
|
|
||
| class PiiEntityThreshold(BaseModel): | ||
| """Per-entity confidence threshold override.""" | ||
|
|
||
| model_config = ConfigDict(populate_by_name=True) | ||
|
|
||
| category: str = Field(alias="pii-entity-category") | ||
| confidence_threshold: float = Field(alias="pii-entity-confidence-threshold") | ||
|
|
||
|
|
||
| class PiiDetectionRequest(BaseModel): | ||
| """Request payload for the PII detection endpoint.""" | ||
|
|
||
| model_config = ConfigDict(populate_by_name=True) | ||
|
|
||
| documents: Optional[list[PiiDocument]] = None | ||
| files: Optional[list[PiiFile]] = None | ||
| language_code: Optional[str] = Field(default=None, alias="languageCode") | ||
| confidence_threshold: Optional[float] = Field( | ||
| default=None, alias="confidenceThreshold" | ||
| ) | ||
| entity_thresholds: Optional[list[PiiEntityThreshold]] = Field( | ||
| default=None, alias="entityThresholds" | ||
| ) | ||
|
|
||
|
|
||
| class PiiEntity(BaseModel): | ||
| """A single detected PII entity.""" | ||
|
|
||
| model_config = ConfigDict(populate_by_name=True) | ||
|
|
||
| pii_text: str = Field(alias="piiText") | ||
| replacement_text: str = Field(alias="replacementText") | ||
| pii_type: str = Field(alias="piiType") | ||
| offset: int | ||
| confidence_score: float = Field(alias="confidenceScore") | ||
|
|
||
|
|
||
| class PiiDocumentResult(BaseModel): | ||
| """PII detection result for a single document.""" | ||
|
|
||
| model_config = ConfigDict(populate_by_name=True) | ||
|
|
||
| id: str | ||
| role: str | ||
| masked_document: str = Field(alias="maskedDocument") | ||
| initial_document: str = Field(alias="initialDocument") | ||
| pii_entities: list[PiiEntity] = Field(default_factory=list, alias="piiEntities") | ||
|
|
||
|
|
||
| class PiiFileResult(BaseModel): | ||
| """PII detection result for a single file (fileUrl is the redacted URL).""" | ||
|
|
||
| model_config = ConfigDict(populate_by_name=True) | ||
|
|
||
| file_name: str = Field(alias="fileName") | ||
| file_url: str = Field(alias="fileUrl") | ||
| pii_entities: list[PiiEntity] = Field(default_factory=list, alias="piiEntities") | ||
|
|
||
|
|
||
| class PiiDetectionResponse(BaseModel): | ||
| """Response payload from the PII detection endpoint.""" | ||
|
|
||
| model_config = ConfigDict(populate_by_name=True) | ||
|
|
||
| response: list[PiiDocumentResult] = Field(default_factory=list) | ||
| files: list[PiiFileResult] = Field(default_factory=list) |
98 changes: 98 additions & 0 deletions
98
packages/uipath-platform/src/uipath/platform/pii_detection/pii_utilities.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,98 @@ | ||
| """Utility methods for working with PII data. | ||
|
|
||
| Python port of UiPath.SemanticProxy.Client.PiiUtilities (C#). | ||
| """ | ||
|
|
||
| import re | ||
| from typing import Callable, Iterable | ||
|
|
||
| from .pii_detection import PiiDetectionResponse, PiiEntity | ||
|
|
||
|
|
||
| def rehydrate_from_pii_entities( | ||
| masked_text: str, pii_entities: Iterable[PiiEntity] | ||
| ) -> str: | ||
| """Rehydrate masked text by replacing PII placeholders with original values. | ||
|
|
||
| Placeholders (e.g. ``[Person-1]``) are matched case-insensitively and replaced | ||
| with the corresponding original PII text. The function also replaces variants | ||
| without the surrounding brackets (e.g. ``Person-1``) in case the LLM stripped | ||
| them in its output. | ||
|
|
||
| Args: | ||
| masked_text: The masked text with PII placeholders. | ||
| pii_entities: The PII entities containing the original values. | ||
|
|
||
| Returns: | ||
| The rehydrated text with original PII values. | ||
| """ | ||
| if not masked_text: | ||
| return masked_text | ||
|
|
||
| entities = [e for e in pii_entities if e.replacement_text] | ||
| if not entities: | ||
| return masked_text | ||
|
|
||
| # Sort by replacement text length descending to avoid substring collisions | ||
| # (e.g. "[Person-10]" must be replaced before "[Person-1]"). | ||
| entities.sort(key=lambda e: len(e.replacement_text), reverse=True) | ||
|
|
||
| rehydrated = masked_text | ||
| for entity in entities: | ||
| if not entity.replacement_text or not entity.pii_text: | ||
| continue | ||
| # Replace the full placeholder (with brackets) case-insensitively. | ||
| # ``_literal_replacer`` bypasses regex backreference interpretation in the | ||
| # replacement string. | ||
| rehydrated = re.sub( | ||
| re.escape(entity.replacement_text), | ||
| _literal_replacer(entity.pii_text), | ||
| rehydrated, | ||
| flags=re.IGNORECASE, | ||
| ) | ||
| # Also replace the content without brackets (in case the LLM dropped them). | ||
| if entity.replacement_text.startswith("[") and entity.replacement_text.endswith( | ||
| "]" | ||
| ): | ||
| no_brackets = entity.replacement_text[1:-1] | ||
| rehydrated = re.sub( | ||
| re.escape(no_brackets), | ||
| _literal_replacer(entity.pii_text), | ||
| rehydrated, | ||
| flags=re.IGNORECASE, | ||
| ) | ||
|
|
||
| return rehydrated | ||
|
|
||
|
|
||
| def _literal_replacer(replacement: str) -> Callable[[re.Match[str]], str]: | ||
| """Return a replacement function that ignores regex backreference syntax.""" | ||
|
|
||
| def replace(_match: re.Match[str]) -> str: | ||
| return replacement | ||
|
|
||
| return replace | ||
|
|
||
|
|
||
| def rehydrate_from_pii_response( | ||
| masked_text: str, response: PiiDetectionResponse | ||
| ) -> str: | ||
| """Rehydrate masked text using all PII entities from a detection response. | ||
|
|
||
| Merges entities from both ``response.response`` (detected in documents/prompts) | ||
| and ``response.files`` (detected in files), so placeholders originating from | ||
| either source are rehydrated. | ||
|
|
||
| Args: | ||
| masked_text: The masked text with PII placeholders. | ||
| response: The PII detection response containing entities to rehydrate. | ||
|
|
||
| Returns: | ||
| The rehydrated text with original PII values. | ||
| """ | ||
| entities: list[PiiEntity] = [] | ||
| for doc in response.response: | ||
| entities.extend(doc.pii_entities) | ||
| for file in response.files: | ||
| entities.extend(file.pii_entities) | ||
| return rehydrate_from_pii_entities(masked_text, entities) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.