Skip to content

Commit cc54d92

Browse files
authored
[feat]: include / in handle_& function (#34)
- Added corresponding unit test
1 parent 9383396 commit cc54d92

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

emm/preprocessing/functions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,9 @@ def map_shorthands(name):
7474
"remove_extra_space": F.regex_replace(r"""\s+""", " ", simple=True),
7575
# Map all the shorthands to the same format (stichting => stg)
7676
"map_shorthands": map_shorthands,
77-
# Merge & separated abbreviations by removing & and the spaces between them
77+
# Merge & or / separated abbreviations by removing & or / and the spaces between them
7878
"merge_&": F.regex_replace(
79-
r"(\s|^)(\w)\s*&\s*(\w)(\s|$)", r"$1$2$3$4" if use_spark else r"\1\2\3\4", simple=True
79+
r"(\s|^)(\w)\s*[&/]\s*(\w)(\s|$)", r"$1$2$3$4" if use_spark else r"\1\2\3\4", simple=True
8080
),
8181
# remove legal form
8282
"remove_legal_form": F.run_custom_function(

tests/unit/test_name_preprocessing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
["Tzu-Sun_BV.a;b,c_ä", "ąćęłńóśźżĄĆĘŁŃÓŚŹŻ", "Café"],
5353
["Tzu-Sun_BV.a;b,c_a", "acelnoszzACELNOSZZ", "Cafe"],
5454
),
55-
("merge_&", ["xyz & abc C&D"], ["xyz & abc CD"]),
55+
("merge_&", ["xyz & abc C&D", "foobar S/A"], ["xyz & abc CD", "foobar SA"]),
5656
(
5757
"preprocess_name",
5858
["Tzu-Sun_BV.a;b,c_ä", "Tzu-Sun_BV morethan1space"],

0 commit comments

Comments
 (0)