amaranaitsaidi
amaranaitsaidi

Reputation: 101

increment a number at the end of a string using regex Python

I would like to add a regex to this code that will allow me to find that the reference ends with -FT(NUMBER) for example CHB-16236-FT-FT045 and increment this reference to have CHB-16236-FT046

import re

pattern_poteaux = r"(POT|PHT)+[-]+[0-9]{5}[-]+[a-zA-Z]{2}[-]+\d+$"
pattern_chambre = r"CHB+[-]+[0-9]+[-]+[a-zA-Z]{2}[-]+\d+$"

old_references = []
new_references = []
invalid_references = []


def attribute_check(pattern, sample_str):
    """
        @param: regex pattern, sample string
        return : True if string match regex pattern, False if not.
    """
    sample_str = str(sample_str)
    if re.search(pattern, sample_str) is not None:
        return True
    else:
        return False


def increment_ref(pattern, sample_str):
    """
    @param: string
    return : incrément référence with 1
    """
    if attribute_check(pattern, sample_str) == True:
        old_references.append(sample_str)
        return re.sub(r'[^-]+[0-9]$', lambda x: str(int(x.group()) + 1).zfill(len(x.group())), sample_str)

    else:
        invalid_references.append(sample_str)


if __name__ == "__main__":
    reference_chamber = 'CHB-16236-FT-FT045'
    # TODO increment reference with FT001 at the end
    increment_ref(pattern_chambre, reference_chamber)

Upvotes: 0

Views: 226

Answers (3)

MonkeyZeus
MonkeyZeus

Reputation: 20737

You need to make use of a replacement callback:

# coding=utf8
import re

# Regex to find "-FT{digits}{end of line}
regex = r"(?<=-FT)(\d+)$"

# Increment the digit by one and maintain left pad zeros
def subst(m):
    return format(int(m.group(1))+1, '0'+str(len(m.group(1))))

# Test your data
print (re.sub(regex, subst, "CHB-16236-FT-FT005", 0, re.MULTILINE))
print (re.sub(regex, subst, "CHB-16236-FT-FT009", 0, re.MULTILINE))
print (re.sub(regex, subst, "CHB-16236-FT-FT045", 0, re.MULTILINE))
print (re.sub(regex, subst, "CHB-16236-FT-FT145", 0, re.MULTILINE))
print (re.sub(regex, subst, "CHB-16236-FT-FT999", 0, re.MULTILINE))
print (re.sub(regex, subst, "CHB-16236-FT-FT0999", 0, re.MULTILINE))
print (re.sub(regex, subst, "CHB-16236-FT-FT2009", 0, re.MULTILINE))
print (re.sub(regex, subst, "CHB-16236-FT-FT09998", 0, re.MULTILINE))

Results:

CHB-16236-FT-FT006
CHB-16236-FT-FT010
CHB-16236-FT-FT046
CHB-16236-FT-FT146
CHB-16236-FT-FT1000
CHB-16236-FT-FT1000
CHB-16236-FT-FT2010
CHB-16236-FT-FT09999

Upvotes: 1

Avinash
Avinash

Reputation: 875

Try this. Check #Modified comments to see the modifications from your original code:

import re

pattern_poteaux = r"(POT|PHT)+[-]+[0-9]{5}[-]+[a-zA-Z]{2}[-]+\d+$"
pattern_chambre = r"(CHB-\d{5}[-FT]*(0*)?)([1-9]*)" #Modified

old_references = []
new_references = []
invalid_references = []


def attribute_check(pattern, sample_str):
    """
        @param: regex pattern, sample string
        return : True if string match regex pattern, False if not.
    """
    sample_str = str(sample_str)
    if re.search(pattern, sample_str) is not None:
        return True
    else:
        return False


def increment_ref(pattern, sample_str):
    """
    @param: string
    return : incrément référence with 1
    """
    if attribute_check(pattern, sample_str) == True:
        old_references.append(sample_str)
        
        # Modified
        return re.sub(pattern_chambre, lambda exp: "{}{}".format(exp.group(1), str(int(exp.group(3)) + 1)),sample_str)

    else:
        invalid_references.append(sample_str)


if __name__ == "__main__":
    reference_chamber = 'CHB-16236-FT-FT045'
    # TODO increment reference with FT001 at the end
    print(increment_ref(pattern_chambre, reference_chamber))
    print(increment_ref(pattern_chambre, 'CHB-16236-FT-FT123'))
    print(increment_ref(pattern_chambre, 'CHB-16236-FT-FT01234'))
    print(increment_ref(pattern_chambre, 'CHB-16236-FT-FT00012'))

Output:

CHB-16236-FT-FT046
CHB-16236-FT-FT124
CHB-16236-FT-FT01235
CHB-16236-FT-FT00013

Upvotes: 1

Scott Hunter
Scott Hunter

Reputation: 49803

Your pattern

  1. allows for runs of -s, which may not be correct
  2. does not try to match the characters that appear between the last - and the final number

Upvotes: 1

Related Questions