Reputation: 2299
I am trying to figure out a way to insert a character into a string if that character doesn't match.
Let's say I have these two strings:
s1: CGGGTATCCAA
s2: CCCTAGGTCCCA
It should output this:
s1: C----GGGTATCC-AA
s2: CCCTAGG-T--CCCA-
The algorithm is as follows:
if(lengthOfs1 > lengthOfs2)
if character mismatch
put a dash on s2
else
put the original character
else if(lengthOfs1 <= lengthOfs2)
if character mistmatch
put a dash on s1
else
put the original character
I've tried to accomplish this by having the two original strings, and looping through a for loop until I hit '\0' in a string. Then I do the compares, and finally use something like:
strncpy(&s1_final_string[i + 1], &s1[i], 1) // if they are equal
strncpy(&s1_final_string[i], "-", 1); // if I need to put a dash
Is there a simple way to approach this situation and copy over a '-' character if we have a mismatch?
Upvotes: 1
Views: 49
Reputation: 2899
Here's a greedy difference algorithm that matches your input and output. Please note that this algorithm does not find a minimal mismatch between any two strings. Instead, at each mismatch point it scans ahead in both strings to find the next matching points and uses whichever one is closer.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int greedy_diff_str(const char *s1, const char *s2, char **s1_final_ptr, char **s2_final_ptr)
{
size_t s1_len = strlen(s1);
size_t s2_len = strlen(s2);
size_t s1_index = 0, s2_index = 0, final_index = 0;
char *s1_final, *s2_final;
if (NULL == (s1_final = *s1_final_ptr = (char*) calloc(s1_len + s2_len + 1, 1)))
{
*s2_final_ptr = NULL;
return -1;
}
if (NULL == (s2_final = *s2_final_ptr = (char*) calloc(s1_len + s2_len + 1, 1)))
{
free(s1_final);
*s1_final_ptr = NULL;
return -1;
}
while ('\0' != s1[s1_index] && '\0' != s2[s2_index])
{
if (s1[s1_index] == s2[s2_index])
{
s1_final[final_index] = s1[s1_index++];
s2_final[final_index++] = s2[s2_index++];
//printf("s1: '%s'\ns2: '%s'\n", s1_final, s2_final);
}
else
{
size_t s1_dashes, s2_dashes, i;
/* count how many dashes we'd have to add to s1 to reach next match point with s2 */
for (i = s2_index + 1; '\0' != s2[i] && s1[s1_index] != s2[i]; ++i);
s1_dashes = i - s2_index;
/* count how many dashes we'd have to add to s2 to reach next match point with s1 */
for (i = s1_index + 1; '\0' != s1[i] && s2[s2_index] != s1[i]; ++i);
s2_dashes = i - s1_index;
//printf("mismatch at s1[%lu] = '%c'; s2[%lu] = '%c'; s1_dashes = %lu; s2_dashes = %lu\n", s1_index, s1[s1_index], s2_index, s2[s2_index], s1_dashes, s2_dashes);
/* pick whichever path results in less dashes; break ties by adding dashes to string from which we've consumed more */
if (s1_dashes < s2_dashes || (s1_dashes == s2_dashes && s1_index >= s2_index))
{
while (s1_dashes--)
{
s1_final[final_index] = '-';
s2_final[final_index++] = s2[s2_index++];
}
}
else
{
while (s2_dashes--)
{
s1_final[final_index] = s1[s1_index++];
s2_final[final_index++] = '-';
}
}
//printf("s1: '%s'\ns2: '%s'\n", s1_final, s2_final);
}
}
for (; '\0' != s1[s1_index]; ++s1_index, ++final_index)
{
s1_final[final_index] = s1[s1_index];
s2_final[final_index] = '-';
//printf("s1: '%s'\ns2: '%s'\n", s1_final, s2_final);
}
for (; '\0' != s2[s2_index]; ++s2_index, ++final_index)
{
s1_final[final_index] = '-';
s2_final[final_index] = s2[s2_index];
//printf("s1: '%s'\ns2: '%s'\n", s1_final, s2_final);
}
s1_final[final_index] = '\0';
s2_final[final_index] = '\0';
return 0;
}
int main()
{
char s1[] = "CGGGTATCCAA", s2[] = "CCCTAGGTCCCA", *s1_fin, *s2_fin;
printf("Input:\n");
printf("s1: '%s'\n", s1);
printf("s2: '%s'\n", s2);
greedy_diff_str(s1, s2, &s1_fin, &s2_fin);
printf("Output:\n");
printf("s1: '%s'\n", s1_fin);
printf("s2: '%s'\n", s2_fin);
return 0;
}
Here's the output of a run:
john-schultzs-macbook-pro:~ jschultz$ ./a.out
Input:
s1: 'CGGGTATCCAA'
s2: 'CCCTAGGTCCCA'
Output:
s1: 'C----GGGTATCC-AA'
s2: 'CCCTAGG-T--CCCA-'
Upvotes: 1