Reputation: 291
I have a list like this
['AX95', 'BD95']
I need to expand the list starting from AX95
to BD95
like this
['AX95', 'AY95', 'AZ95', 'BA95','BB95','BC95','BD95']
My current code works fine for single alphabets like
['A95', 'D95']
My code looks like this for now,
import re
def resolve(item):
start = int(re.search(r'\d+', item[0]).group())
end = int(re.search(r'\d+', item[1]).group())
print(item)
c = re.search(r'[a-zA-Z]+', item[0]).group()
d = re.search(r'[a-zA-Z]+', item[1]).group()
print(c, d)
for char in range(ord(c), ord(d) + 1):
yield chr(char) + str(end)
xx = resolve(['AX95', 'BD95'])
print(list(xx))
How to do this?
Upvotes: 0
Views: 1911
Reputation: 46759
Here is an alternative approach which should also cope with roll over.
It works by first splitting out the letters and numbers from the start and end ranges. The two letter groups are then converted to base 27 numbers. The aim is to then simply count from start to end.
Python's itertools.product
is then used to create a sequence of AA
to ZZ
with a itertools.islice
providing the necessary range.
Why base 27? This was a workaround similar to dealing with leading zeros. i.e. converting AA
and AAA
to base 26 would give the same value.
import string, itertools, re
def convert_to_range(chars):
value = 0
for index, unit in enumerate([ord(x) - 64 for x in reversed(chars)]):
value += unit * (27 ** index)
return value
def resolve(item):
start_split = re.split("(\d+)", item[0])
end_split = re.split("(\d+)", item[1])
trailing = end_split[1]
start = convert_to_range(start_split[0])
end = convert_to_range(end_split[0])
cols = [' '+string.ascii_uppercase] * len(end_split[0])
for x in itertools.islice(itertools.product(*cols), start, end+1):
step = "".join(x).lstrip(" ")
if ' ' in step:
continue
yield step + trailing
print(list(resolve(['AX95', 'BD95'])))
print(list(resolve(['X95', 'AA95'])))
print(list(resolve(['ZX95', 'AAB95'])))
This would give you:
['AX95', 'AY95', 'AZ95', 'BA95', 'BB95', 'BC95', 'BD95']
['X95', 'Y95', 'Z95', 'AA95']
['ZX95', 'ZY95', 'ZZ95', 'AAA95', 'AAB95']
Upvotes: 1
Reputation: 485
Here you are :)
import re
def resolve(item):
print (item)
num = int(re.search(r'\d+', item[0]).group())
p11 = re.search(r'(\w)(\w)', item[0]).group(1)
p12 = re.search(r'(\w)(\w)', item[0]).group(2)
p21 = re.search(r'(\w)(\w)', item[1]).group(1)
p22 = re.search(r'(\w)(\w)', item[1]).group(2)
print (p11, p12, p21, p22)
for word in range(ord(p11), ord(p21) + 1):
for word2 in range(ord(p12) if ord(p11) == word else ord('A'), (ord(p22) if ord(p21) == word else ord('Z')) + 1):
yield chr(word) + chr(word2) + str(num)
Upvotes: 1
Reputation: 90889
You cannot directly use ord()
on multiple characters , it would error out with the error -
TypeError: ord() expected a character, but string of length 2 found
Also , it would be very complicated to do this with for
loop and range()
, I would suggest using while
loop and checking till the start characters become the end characters.
One way to do this would be to get the last element, check if its Z
change it to A
and increment the element before it. Otherwise take its ord()
increment by 1
and then get that character using chr()
.
Example Algorithm that works on arbitrary size of characters -
def resolve(item):
start = int(re.search(r'\d+', item[0]).group())
c = re.search(r'[a-zA-Z]+', item[0]).group()
d = re.search(r'[a-zA-Z]+', item[1]).group()
print(c, d)
s = c
yield s + str(start)
while s != d:
ls = len(s) - 1
news = ""
for i in range(ls,-1,-1):
c = s[i]
if c.upper() == 'Z':
news += 'A'
else:
news += chr(ord(c) + 1)
break
s = s[:i] + news[::-1]
yield s + str(start)
Example/Demo -
>>> def resolve(item):
... start = int(re.search(r'\d+', item[0]).group())
... c = re.search(r'[a-zA-Z]+', item[0]).group()
... d = re.search(r'[a-zA-Z]+', item[1]).group()
... print(c, d)
... s = c
... yield s + str(start)
... while s != d:
... ls = len(s) - 1
... news = ""
... for i in range(ls,-1,-1):
... c = s[i]
... if c.upper() == 'Z':
... news += 'A'
... else:
... news += chr(ord(c) + 1)
... break
... s = s[:i] + news[::-1]
... yield s + str(start)
...
>>>
>>> xx = resolve(['AX95', 'BD95'])
>>>
>>> print(list(xx))
AX BD
['AX95', 'AY95', 'AZ95', 'BA95', 'BB95', 'BC95', 'BD95']
Upvotes: 4