Reputation: 71424
I'm trying to diff two strings to determine whether or not they solely vary in one numerical subset of the string structure; for example,
varies_in_single_number_field('foo7bar', 'foo123bar')
# Returns True, because 7 != 123, and there's only one varying
# number region between the two strings.
In Python I can use the difflib
to accomplish this:
import difflib, doctest
def varies_in_single_number_field(str1, str2):
"""
A typical use case is as follows:
>>> varies_in_single_number_field('foo7bar00', 'foo123bar00')
True
Numerical variation in two dimensions is no good:
>>> varies_in_single_number_field('foo7bar00', 'foo123bar01')
False
Varying in a nonexistent field is okay:
>>> varies_in_single_number_field('foobar00', 'foo123bar00')
True
Identical strings don't *vary* in any number field:
>>> varies_in_single_number_field('foobar00', 'foobar00')
False
"""
in_differing_substring = False
passed_differing_substring = False # There should be only one.
differ = difflib.Differ()
for letter_diff in differ.compare(str1, str2):
letter = letter_diff[2:]
if letter_diff.startswith(('-', '+')):
if passed_differing_substring: # Already saw a varying field.
return False
in_differing_substring = True
if not letter.isdigit(): return False # Non-digit diff character.
elif in_differing_substring: # Diff character not found - end of diff.
in_differing_substring = False
passed_differing_substring = True
return passed_differing_substring # No variation if no diff was passed.
if __name__ == '__main__': doctest.testmod()
But I have no idea how to find something like difflib
for C++. Alternative approaches welcome. :)
Upvotes: 4
Views: 2949
Reputation: 90422
This might work, it at least passes your demonstration test: EDIT: I've made some modifications to deal with some string indexing issues. I believe it should be good now.
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <cctype>
bool starts_with(const std::string &s1, const std::string &s2) {
return (s1.length() <= s2.length()) && (s2.substr(0, s1.length()) == s1);
}
bool ends_with(const std::string &s1, const std::string &s2) {
return (s1.length() <= s2.length()) && (s2.substr(s2.length() - s1.length()) == s1);
}
bool is_numeric(const std::string &s) {
for(std::string::const_iterator it = s.begin(); it != s.end(); ++it) {
if(!std::isdigit(*it)) {
return false;
}
}
return true;
}
bool varies_in_single_number_field(std::string s1, std::string s2) {
size_t index1 = 0;
size_t index2 = s1.length() - 1;
if(s1 == s2) {
return false;
}
if((s1.empty() && is_numeric(s2)) || (s2.empty() && is_numeric(s1))) {
return true;
}
if(s1.length() < s2.length()) {
s1.swap(s2);
}
while(index1 < s1.length() && starts_with(s1.substr(0, index1), s2)) { index1++; }
while(ends_with(s1.substr(index2), s2)) { index2--; }
return is_numeric(s1.substr(index1 - 1, (index2 + 1) - (index1 - 1)));
}
int main() {
std::cout << std::boolalpha << varies_in_single_number_field("foo7bar00", "foo123bar00") << std::endl;
std::cout << std::boolalpha << varies_in_single_number_field("foo7bar00", "foo123bar01") << std::endl;
std::cout << std::boolalpha << varies_in_single_number_field("foobar00", "foo123bar00") << std::endl;
std::cout << std::boolalpha << varies_in_single_number_field("foobar00", "foobar00") << std::endl;
std::cout << std::boolalpha << varies_in_single_number_field("7aaa", "aaa") << std::endl;
std::cout << std::boolalpha << varies_in_single_number_field("aaa7", "aaa") << std::endl;
std::cout << std::boolalpha << varies_in_single_number_field("aaa", "7aaa") << std::endl;
std::cout << std::boolalpha << varies_in_single_number_field("aaa", "aaa7") << std::endl;
}
Basically, it looks for a string which has 3 parts, string2 begins with part1, string2 ends with part3 and part2 is only digits.
Upvotes: 2
Reputation: 71424
@Evan Teran: looks like we did this in parallel -- I have a markedly less readable O(n) implementation:
#include <cassert>
#include <cctype>
#include <string>
#include <sstream>
#include <iostream>
using namespace std;
ostringstream debug;
const bool DEBUG = true;
bool varies_in_single_number_field(const string &str1, const string &str2) {
bool in_difference = false;
bool passed_difference = false;
string str1_digits, str2_digits;
size_t str1_iter = 0, str2_iter = 0;
while (str1_iter < str1.size() && str2_iter < str2.size()) {
const char &str1_char = str1.at(str1_iter);
const char &str2_char = str2.at(str2_iter);
debug << "str1: " << str1_char << "; str2: " << str2_char << endl;
if (str1_char == str2_char) {
if (in_difference) {
in_difference = false;
passed_difference = true;
}
++str1_iter, ++str2_iter;
continue;
}
in_difference = true;
if (passed_difference) { /* Already passed a difference. */
debug << "Already passed a difference." << endl;
return false;
}
bool str1_char_is_digit = isdigit(str1_char);
bool str2_char_is_digit = isdigit(str2_char);
if (str1_char_is_digit && !str2_char_is_digit) {
++str1_iter;
str1_digits.push_back(str1_char);
} else if (!str1_char_is_digit && str2_char_is_digit) {
++str2_iter;
str2_digits.push_back(str2_char);
} else if (str1_char_is_digit && str2_char_is_digit) {
++str1_iter, ++str2_iter;
str1_digits.push_back(str1_char);
str2_digits.push_back(str2_char);
} else { /* Both are non-digits and they're different. */
return false;
}
}
if (in_difference) {
in_difference = false;
passed_difference = true;
}
string str1_remainder = str1.substr(str1_iter);
string str2_remainder = str2.substr(str2_iter);
debug << "Got to exit point; passed difference: " << passed_difference
<< "; str1 digits: " << str1_digits
<< "; str2 digits: " << str2_digits
<< "; str1 remainder: " << str1_remainder
<< "; str2 remainder: " << str2_remainder
<< endl;
return passed_difference
&& (str1_digits != str2_digits)
&& (str1_remainder == str2_remainder);
}
int main() {
assert(varies_in_single_number_field("foo7bar00", "foo123bar00") == true);
assert(varies_in_single_number_field("foo7bar00", "foo123bar01") == false);
assert(varies_in_single_number_field("foobar00", "foo123bar00") == true);
assert(varies_in_single_number_field("foobar00", "foobar00") == false);
assert(varies_in_single_number_field("foobar00", "foobaz00") == false);
assert(varies_in_single_number_field("foo00bar", "foo01barz") == false);
assert(varies_in_single_number_field("foo01barz", "foo00bar") == false);
if (DEBUG) {
cout << debug.str();
}
return 0;
}
Upvotes: 0
Reputation: 3200
How about using something like boost::regex?
// pseudo code, may or may not compile bool match_except_numbers(const std::string& s1, const std::string& s2) { static const boost::regex fooNumberBar("foo\\d+bar"); return boost::match(s1, fooNumberBar) && boost::match(s2, fooNumberBar); }
Upvotes: 0
Reputation: 34034
You could do an ad hoc approach: You're looking to match strings s and s', where s=abc and s'=ab'c, and the b and b' should be two distinct numbers (possible empty). So:
Upvotes: 1
Reputation: 21747
It's probably a bit of overkill, but you could use boost to interface to python. At the worst, difflib is implemented in pure python, and it's not too long. It should be possible to port from python to C...
Upvotes: 1