Skip to content

Commit 1e67988

Browse files
bewithgauravgargsaumyajahnvi480
authored
REFACTOR: Migrate Connection string sanitization from regex to parser-based (microsoft#522)
### Work Item / Issue Reference <!-- IMPORTANT: Please follow the PR template guidelines below. For mssql-python maintainers: Insert your ADO Work Item ID below For external contributors: Insert Github Issue number below Only one reference is required - either GitHub issue OR ADO Work Item. --> <!-- mssql-python maintainers: ADO Work Item --> > [AB#43979](https://sqlclientdrivers.visualstudio.com/c6d89619-62de-46a0-8b46-70b92a84d85e/_workitems/edit/43979) ------------------------------------------------------------------- ### Summary <!-- Insert your summary of changes below. Minimum 10 characters required. --> Replaces the regex-based `sanitize_connection_string()` with a parser-based implementation that uses `_ConnectionStringParser` to correctly handle all ODBC connection string value formats including braced values per ODBC spec. ## Changes - Moved `sanitize_connection_string()` to `connection_string_parser.py` where it naturally belongs alongside the parser it depends on — eliminates circular import between helpers and parser modules - `helpers.py` retains a thin delegate for backward compatibility - `connection.py` imports directly from `connection_string_parser` - Added 5 new test cases covering braced values, escaped braces, and edge cases ## Testing - All existing `TestPasswordSanitization` tests pass - All connection string parser and allowlist tests pass (no regressions) --------- Co-authored-by: gargsaumya <saumyagarg.100@gmail.com> Co-authored-by: Jahnvi Thakkar <61936179+jahnvi480@users.noreply.github.com>
1 parent 803da76 commit 1e67988

4 files changed

Lines changed: 128 additions & 14 deletions

File tree

mssql_python/connection.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@
2020
import mssql_python
2121
from mssql_python.cursor import Cursor
2222
from mssql_python.helpers import (
23-
sanitize_connection_string,
2423
sanitize_user_input,
2524
validate_attribute_value,
2625
)
26+
from mssql_python.connection_string_parser import sanitize_connection_string
2727
from mssql_python.logging import logger
2828
from mssql_python import ddbc_bindings
2929
from mssql_python.pooling import PoolingManager

mssql_python/connection_string_parser.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
from mssql_python.helpers import sanitize_user_input
2222
from mssql_python.logging import logger
2323

24+
_SENSITIVE_KEYS = frozenset({"pwd"})
25+
2426

2527
class _ConnectionStringParser:
2628
"""
@@ -375,3 +377,48 @@ def _parse_braced_value(self, connection_str: str, start_pos: int) -> Tuple[str,
375377

376378
# Reached end without finding closing '}'
377379
raise ValueError(f"Unclosed braced value starting at position {brace_start_pos}")
380+
381+
382+
def sanitize_connection_string(conn_str: str) -> str:
383+
"""
384+
Sanitize a connection string by masking sensitive values (PWD, Password).
385+
386+
Uses _ConnectionStringParser to correctly handle ODBC braced values
387+
(e.g. PWD={Top;Secret}) rather than a simple regex, which would truncate
388+
at the first semicolon and leak the tail of the password.
389+
390+
If parsing fails (malformed input), the entire string is redacted to
391+
prevent any partial password leakage.
392+
393+
Args:
394+
conn_str (str): The connection string to sanitize.
395+
Returns:
396+
str: The sanitized connection string.
397+
"""
398+
from mssql_python.connection_string_builder import _ConnectionStringBuilder
399+
400+
logger.debug(
401+
"sanitize_connection_string: Sanitizing connection string (length=%d)", len(conn_str)
402+
)
403+
404+
try:
405+
parser = _ConnectionStringParser(validate_keywords=False)
406+
params = parser._parse(conn_str)
407+
408+
sanitized_params = {}
409+
for key, value in params.items():
410+
canonical = _ConnectionStringParser.normalize_key(key)
411+
display_key = canonical if canonical else key
412+
if key in _SENSITIVE_KEYS:
413+
sanitized_params[display_key] = "***"
414+
else:
415+
sanitized_params[display_key] = value
416+
417+
builder = _ConnectionStringBuilder(sanitized_params)
418+
sanitized = builder.build()
419+
except Exception:
420+
logger.debug("sanitize_connection_string: Failed to parse, redacting entire string")
421+
sanitized = "<redacted – unparseable connection string>"
422+
423+
logger.debug("sanitize_connection_string: Password fields masked")
424+
return sanitized

mssql_python/helpers.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,19 +41,20 @@ def check_error(handle_type: int, handle: Any, ret: int) -> None:
4141
def sanitize_connection_string(conn_str: str) -> str:
4242
"""
4343
Sanitize the connection string by removing sensitive information.
44+
45+
Delegates to the parser-based implementation in connection_string_parser
46+
which correctly handles ODBC braced values (e.g. PWD={Top;Secret}).
47+
4448
Args:
4549
conn_str (str): The connection string to sanitize.
4650
Returns:
4751
str: The sanitized connection string.
4852
"""
49-
logger.debug(
50-
"sanitize_connection_string: Sanitizing connection string (length=%d)", len(conn_str)
53+
from mssql_python.connection_string_parser import (
54+
sanitize_connection_string as _sanitize,
5155
)
52-
# Remove sensitive information from the connection string, Pwd section
53-
# Replace Pwd=...; or Pwd=... (end of string) with Pwd=***;
54-
sanitized = re.sub(r"(Pwd\s*=\s*)[^;]*", r"\1***", conn_str, flags=re.IGNORECASE)
55-
logger.debug("sanitize_connection_string: Password fields masked")
56-
return sanitized
56+
57+
return _sanitize(conn_str)
5758

5859

5960
def sanitize_user_input(user_input: str, max_length: int = 50) -> str:

tests/test_007_logging.py

Lines changed: 72 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -345,20 +345,86 @@ def test_pwd_sanitization(self, cleanup_logger):
345345
assert "secret123" not in sanitized
346346

347347
def test_pwd_case_insensitive(self, cleanup_logger):
348-
"""PWD/Pwd/pwd should all be sanitized (case-insensitive)"""
348+
"""PWD/Pwd/pwd should all be sanitized to canonical PWD=***"""
349349
from mssql_python.helpers import sanitize_connection_string
350350

351351
test_cases = [
352-
("Server=localhost;PWD=secret;Database=test", "PWD=***"),
353-
("Server=localhost;Pwd=secret;Database=test", "Pwd=***"),
354-
("Server=localhost;pwd=secret;Database=test", "pwd=***"),
352+
"Server=localhost;PWD=secret;Database=test",
353+
"Server=localhost;Pwd=secret;Database=test",
354+
"Server=localhost;pwd=secret;Database=test",
355355
]
356356

357-
for conn_str, expected in test_cases:
357+
for conn_str in test_cases:
358358
sanitized = sanitize_connection_string(conn_str)
359-
assert expected in sanitized
359+
assert "PWD=***" in sanitized
360360
assert "secret" not in sanitized
361361

362+
def test_pwd_braced_value_with_semicolon(self, cleanup_logger):
363+
"""PWD with braced value containing semicolons must be fully masked."""
364+
from mssql_python.helpers import sanitize_connection_string
365+
366+
conn_str = "Server=localhost;PWD={Top;Secret};Database=test"
367+
sanitized = sanitize_connection_string(conn_str)
368+
369+
assert "PWD=***" in sanitized
370+
assert "Top" not in sanitized
371+
assert "Secret" not in sanitized
372+
373+
def test_pwd_braced_value_with_escaped_braces(self, cleanup_logger):
374+
"""PWD with escaped closing braces (}}) must be fully masked."""
375+
from mssql_python.helpers import sanitize_connection_string
376+
377+
conn_str = "Server=localhost;PWD={p}}w{{d};Database=test"
378+
sanitized = sanitize_connection_string(conn_str)
379+
380+
assert "PWD=***" in sanitized
381+
assert "p}w{d" not in sanitized
382+
383+
def test_pwd_braced_value_multiple_semicolons(self, cleanup_logger):
384+
"""PWD with multiple semicolons inside braces must be fully masked."""
385+
from mssql_python.helpers import sanitize_connection_string
386+
387+
conn_str = "Server=localhost;PWD={a;b;c;d};Database=test"
388+
sanitized = sanitize_connection_string(conn_str)
389+
390+
assert "PWD=***" in sanitized
391+
for fragment in ("a;b;c;d", "{a;", "b;c", "c;d}"):
392+
assert fragment not in sanitized
393+
394+
def test_pwd_at_end_of_string(self, cleanup_logger):
395+
"""PWD at end of connection string (no trailing semicolon) must be masked."""
396+
from mssql_python.helpers import sanitize_connection_string
397+
398+
conn_str = "Server=localhost;Database=test;PWD=secret"
399+
sanitized = sanitize_connection_string(conn_str)
400+
401+
assert "PWD=***" in sanitized
402+
assert "secret" not in sanitized
403+
404+
def test_no_pwd_preserves_non_sensitive_fields(self, cleanup_logger):
405+
"""Connection string without PWD should preserve non-sensitive fields, even if reformatted."""
406+
from mssql_python.helpers import sanitize_connection_string
407+
408+
conn_str = "Server=localhost;Database=test;UID=user"
409+
sanitized = sanitize_connection_string(conn_str)
410+
411+
assert "Server=localhost" in sanitized
412+
assert "Database=test" in sanitized
413+
assert "UID=user" in sanitized
414+
assert "PWD=***" not in sanitized
415+
assert "redacted" not in sanitized.lower()
416+
417+
def test_malformed_string_fully_redacted(self, cleanup_logger):
418+
"""Malformed connection string should be fully redacted, not partially leaked."""
419+
from mssql_python.helpers import sanitize_connection_string
420+
421+
conn_str = "PWD={unclosed"
422+
sanitized = sanitize_connection_string(conn_str)
423+
424+
assert "unclosed" not in sanitized
425+
assert "PWD" not in sanitized
426+
assert "redacted" in sanitized.lower()
427+
362428
def test_explicit_sanitization_in_logging(self, cleanup_logger):
363429
"""Verify that explicit sanitization works when logging"""
364430
from mssql_python.helpers import sanitize_connection_string

0 commit comments

Comments
 (0)