Skip to content

Commit 6703d99

Browse files
committed
support ignoring characters
1 parent 632af82 commit 6703d99

2 files changed

Lines changed: 25 additions & 5 deletions

File tree

tests/test_unidecode.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,22 @@ def test_ascii(self):
6868

6969
wlog.stop()
7070

71+
72+
def test_ignore(self):
73+
74+
wlog = WarningLogger()
75+
wlog.start("should be ignored")
76+
77+
t = 'æøå'
78+
r = self.unidecode(t, ignore='æøå')
79+
self.assertEqual(r, t)
80+
self.assertEqual(type(r), str)
81+
82+
# unicode objects shouldn't raise warnings
83+
self.assertEqual(0, len(wlog.log))
84+
85+
wlog.stop()
86+
7187
def test_bmp(self):
7288
for n in range(0,0x10000):
7389
# skip over surrogate pairs, which throw a warning

unidecode/__init__.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def _warn_if_not_unicode(string):
2828
RuntimeWarning, 2)
2929

3030

31-
def unidecode_expect_ascii(string):
31+
def unidecode_expect_ascii(string, ignore=''):
3232
"""Transliterate an Unicode object into an ASCII string
3333
3434
>>> unidecode(u"\u5317\u4EB0")
@@ -47,30 +47,34 @@ def unidecode_expect_ascii(string):
4747
try:
4848
bytestring = string.encode('ASCII')
4949
except UnicodeEncodeError:
50-
return _unidecode(string)
50+
return _unidecode(string, ignore)
5151
if version_info[0] >= 3:
5252
return string
5353
else:
5454
return bytestring
5555

56-
def unidecode_expect_nonascii(string):
56+
def unidecode_expect_nonascii(string, ignore=''):
5757
"""Transliterate an Unicode object into an ASCII string
5858
5959
>>> unidecode(u"\u5317\u4EB0")
6060
"Bei Jing "
6161
"""
6262

6363
_warn_if_not_unicode(string)
64-
return _unidecode(string)
64+
return _unidecode(string, ignore)
6565

6666
unidecode = unidecode_expect_ascii
6767

68-
def _unidecode(string):
68+
def _unidecode(string, ignore=''):
6969
retval = []
7070

7171
for char in string:
7272
codepoint = ord(char)
7373

74+
if char in ignore:
75+
retval.append(str(char))
76+
continue
77+
7478
if codepoint < 0x80: # Basic ASCII
7579
retval.append(str(char))
7680
continue

0 commit comments

Comments
 (0)