Reputation: 13
I have the following Python code where function print_processed_username contains an if/else construct. The if-line is very long because of the repeating regular expression. If a modification is ever needed in the regular expression, then the same modification must be made in each occurrence of that regular expression (including the call to process_the_username), which makes the code difficult to maintain.
import re
def process_the_username(username):
return 'e' + username[1:]
def print_processed_username(args):
if len(args) == 1 and type(args[0]) is str and re.compile(r'[\'"]username[\'"]: ?[\'"](\S*)[\'"]', re.IGNORECASE).search(args[0]) and len(re.compile(r'[\'"]username[\'"]: ?[\'"](\S*)[\'"]', re.IGNORECASE).search(args[0]).groups()) == 1 and len(re.compile(r'[\'"]username[\'"]: ?[\'"](\S*)[\'"]', re.IGNORECASE).search(args[0]).groups()[0]) == 7 and re.compile(r'[\'"]username[\'"]: ?[\'"](\S*)[\'"]', re.IGNORECASE).search(args[0]).groups()[0][0] == '_':
# Here args is a list containing one item which is a string and the string contains 'username': '<user>' only once where <user> is 7 characters long and starts with _.
print process_the_username(re.compile(r'[\'"]username[\'"]: ?[\'"](\S*)[\'"]', re.IGNORECASE).search(args[0]).groups()[0])
else:
print "Missing or correct user name format. Nothing to do."
If Python like many other languages had had support for assignment in if-conditions, this problem would have been easy to solve. But as we know, Python doesn't support that.
Thus I'm asking for suggestions on how to write the if-condition the Pythonic way, where the repetition of the regular expression is eliminated. All suggestions that make the code simpler and easier to maintain are highly appreciated.
Here are a few sample executions where the username is processed as expected.
>>> args = ["'location': 'Frankfurt', 'Phone': '+49 123 456789', 'UserName': '_beka01'"]
>>> print_processed_username(args)
ebeka01
>>>
>>> args = ["'UserName': '_beka01', 'location': 'Frankfurt', 'Phone': '+49 123 456789'"]
>>> print_processed_username(args)
ebeka01
>>>
>>> args = ["'UserName': '_beka01'"]
>>> print_processed_username(args)
ebeka01
>>>
>>> args = ["'USERNAME': '_beka01'"]
>>> print_processed_username(args)
ebeka01
>>>
>>> args = ['"location":"Frankfurt", "Phone":"+49 123 456789", "UserName":"_beka01"']
>>> print_processed_username(args)
ebeka01
>>>
>>> args = ['"location":"Frankfurt","Phone":"+49 123 456789","UserName":"_beka01"']
>>> print_processed_username(args)
ebeka01
>>>
Here are a few sample executions where the username is not processed which is as expected.
>>> args = ["'location': 'Frankfurt', 'Phone': '+49 123 456789', 'UserName': 'abeka01'"]
>>> print_processed_username(args)
Missing or correct user name format. Nothing to do.
>>>
>>> args = ["'location': 'Frankfurt', 'Phone': '+49 123 456789'"]
>>> print_processed_username(args)
Missing or correct user name format. Nothing to do.
>>>
>>> args = ["'UserName': '_beka0132'"]
>>> print_processed_username(args)
Missing or correct user name format. Nothing to do.
>>>
Upvotes: 1
Views: 58
Reputation: 361977
Step 1: Compile the regex once and save it in a variable. It doesn't vary, so do this ahead of time before the method's ever called.
username_regex = re.compile(r'[\'"]username[\'"]: ?[\'"](\S*)[\'"]', re.IGNORECASE)
def print_processed_username(args):
if len(args) == 1 and type(args[0]) is str and username_regex.search(args[0]) and len(username_regex.search(args[0]).groups()) == 1 and len(username_regex.search(args[0]).groups()[0]) == 7 and username_regex.search(args[0]).groups()[0][0] == '_':
print process_the_username(username_regex.search(args[0]).groups()[0])
else:
print "Missing or correct user name format. Nothing to do."
Step 2: Eliminate the repeated calls to search()
.
username_regex = re.compile(r'[\'"]username[\'"]: ?[\'"](\S*)[\'"]', re.IGNORECASE)
def print_processed_username(args):
if len(args) != 1 or type(args[0]) is not str:
print "Missing or correct user name format. Nothing to do."
return
result = username_regex.search(args[0])
if result and len(result.groups()) == 1 and len(result.groups()[0]) == 7 and result.groups()[0][0] == '_':
print process_the_username(result.groups()[0])
else:
print "Missing or correct user name format. Nothing to do."
Step 3: Save the user name in a variable.
username_regex = re.compile(r'[\'"]username[\'"]: ?[\'"](\S*)[\'"]', re.IGNORECASE)
def print_processed_username(args):
if len(args) != 1 or type(args[0]) is not str:
print "Missing or correct user name format. Nothing to do."
return
result = username_regex.search(args[0])
if not result or len(result.groups()) != 1:
print "Missing or correct user name format. Nothing to do."
return
username = result.groups()[0]
if len(username) == 7 and username[0] == '_':
print process_the_username(username)
else:
print "Missing or correct user name format. Nothing to do."
Step 4: Extract the string parsing from the code that handles the result. Write a parser that purely parses the string and leave the result to the caller.
username_regex = re.compile(r'[\'"]username[\'"]: ?[\'"](\S*)[\'"]', re.IGNORECASE)
def parse_username(args):
if len(args) != 1 or type(args[0]) is not str: return None
result = username_regex.search(args[0])
if not result or len(result.groups()) != 1: return None
username = result.groups()[0]
if len(username) != 7 or username[0] != '_': return None
return username
def print_processed_username(args):
username = parse_username(args)
if username:
print process_the_username(username)
else:
print "Missing or correct user name format. Nothing to do."
Upvotes: 1