Create a function to use elimination logic to split a list of strings into substrings

Question

I have a list of strings where each string represents the names of two tennis players that played in a match. The list of strings collectively is all known matches in a tournament. An example of a string would be:

"halep-simona-williams-serena"

I'd like to split each string into a tuple of two strings:

("halep-simona", "williams-serena")

Given one string then this clearly isn't possible but given a list of strings that represent a tournament of matches where players have played each other then I'm hopeful there's a way of using elimination logic to split the names.

I've had a go at this with some success:

def split_into_names(urls: list[str]) -> list[tuple[str | None, str | None]]:
    matchups = list()
    for idx, url in enumerate(urls):
        other_urls = urls[:idx] + urls[idx + 1 :]
        full_name_combo = get_name_combo(
            url=url,
            other_urls=other_urls,
        )
        matchups.append(full_name_combo)

    return matchups


def get_name_combo(url: str, other_urls: list[str]) -> tuple[str | None, str | None]:
    full_name_combos = generate_potential_name_combos(url)
    for full_name_combo in full_name_combos:
        if check_name_combo(
            full_name_combo=full_name_combo,
            url=url,
            other_urls=other_urls,
        ):
            return full_name_combo

    else:
        return (None, None)


def generate_potential_name_combos(url: str) -> list[tuple[str, str]]:
    parts = url.split("-")
    full_names = list()
    for i in range(1, len(parts)):
        p1 = "-".join(parts[:i])
        p2 = "-".join(parts[i:])
        full_names.append((p1, p2))

    return full_names


def check_name_combo(
    full_name_combo: tuple[str, str],
    url: str,
    other_urls: list[str],
) -> bool:
    for full_name in full_name_combo:
        for idx, check_url in enumerate(other_urls):
            # can't eliminate url name options if both players have
            # played each other previously in the same order
            if check_url == url:
                continue
            check_url_part = check_url.partition(full_name)
            # can't check url if name not present
            if not check_url_part[1]:
                continue
            # can't be correct if full name splits the url into three
            elif check_url_part[0] and check_url_part[2]:
                continue
            # this is the correct split so need to exit here with the answer
            elif check_url_part[0] and not check_url_part[2]:
                return True
            elif not check_url_part[0] and check_url_part[2]:
                clean_potential_name = check_url_part[2].strip("-")
                remaining_urls = other_urls[:idx] + other_urls[idx + 1 :]
                for remaining_url in remaining_urls:
                    if remaining_url == check_url:
                        continue
                    elif clean_potential_name in remaining_url:
                        return True
            else:
                pass

    return False

If I run the above I'm able to match a list of real life sample strings. However, if I start to build some more complex edge cases then I run into trouble. Here is the code to test the real life and edge case lists:

def main():
    names = [
        "halep-simona-williams-serena",
        "radwanska-agnieszka-halep-simona",
        "williams-serena-wozniacki-caroline",
        "halep-simona-ivanovic-ana",
        "kvitova-petra-wozniacki-caroline",
        "sharapova-maria-radwanska-agnieszka",
        "williams-serena-bouchard-eugenie",
        "sharapova-maria-kvitova-petra",
        "radwanska-agnieszka-wozniacki-caroline",
        "bouchard-eugenie-ivanovic-ana",
        "williams-serena-halep-simona",
        "kvitova-petra-radwanska-agnieszka",
        "sharapova-maria-wozniacki-caroline",
        "halep-simona-bouchard-eugenie",
        "williams-serena-ivanovic-ana",
    ]

    result = split_into_names(names)

    # success :-)
    assert result == [
        ("halep-simona", "williams-serena"),
        ("radwanska-agnieszka", "halep-simona"),
        ("williams-serena", "wozniacki-caroline"),
        ("halep-simona", "ivanovic-ana"),
        ("kvitova-petra", "wozniacki-caroline"),
        ("sharapova-maria", "radwanska-agnieszka"),
        ("williams-serena", "bouchard-eugenie"),
        ("sharapova-maria", "kvitova-petra"),
        ("radwanska-agnieszka", "wozniacki-caroline"),
        ("bouchard-eugenie", "ivanovic-ana"),
        ("williams-serena", "halep-simona"),
        ("kvitova-petra", "radwanska-agnieszka"),
        ("sharapova-maria", "wozniacki-caroline"),
        ("halep-simona", "bouchard-eugenie"),
        ("williams-serena", "ivanovic-ana"),
    ]

    names = [
        "test-test-test-test-test", # I don't think this can ever be split...
        "test-test-test-test-test-phil",
        "test-test-test-test-matt",
        "test-test-test-test-test-james",
        "test-test-kris-test-test",
        "test-test-bob-test-test-phil",
        "test-test-rich-test-test-matt",
    ]

    result = split_into_names(names)

    # fail :-(
    assert result == [
        ("test-test", "test-test-test"),
        ("test-test-test", "test-test-phil"),
        ("test-test", "test-test-matt"),
        ("test-test-test", "test-test-james"),
        ("test-test-kris", "test-test"),
        ("test-test-bob", "test-test-phil"),
        ("test-test-rich", "test-test-matt"),
    ]


if __name__ == "__main__":
    main()

Important: I don't need a less complex solution for the real life cases. I need a solution for the edge cases.

Is what I'm trying to achieve feasible given the high number of permutations of names? If yes, then how would I adapt my code so that the edge cases pass?

If it helps then I'm ok if the answer involves returning (None, None) for combinations that could never be matched (I have identified one edge case already where this is almost certainly needed). However, I can't have any returned tuples that are incorrect.

Create a function to use elimination logic to split a list of strings into substrings

Answers (1)

Related Questions