Alex
Alex

Reputation: 2299

Inserting a character in a string when there isn't a match in C

I am trying to figure out a way to insert a character into a string if that character doesn't match.

Let's say I have these two strings:

s1: CGGGTATCCAA
s2: CCCTAGGTCCCA

It should output this:

s1: C----GGGTATCC-AA
s2: CCCTAGG-T--CCCA-

The algorithm is as follows:

if(lengthOfs1 > lengthOfs2)
    if character mismatch
       put a dash on s2
    else
       put the original character
else if(lengthOfs1 <= lengthOfs2)
    if character mistmatch
       put a dash on s1
    else
       put the original character

I've tried to accomplish this by having the two original strings, and looping through a for loop until I hit '\0' in a string. Then I do the compares, and finally use something like:

strncpy(&s1_final_string[i + 1], &s1[i], 1) // if they are equal
strncpy(&s1_final_string[i], "-", 1); // if I need to put a dash

Is there a simple way to approach this situation and copy over a '-' character if we have a mismatch?

Upvotes: 1

Views: 49

Answers (1)

jschultz410
jschultz410

Reputation: 2899

Here's a greedy difference algorithm that matches your input and output. Please note that this algorithm does not find a minimal mismatch between any two strings. Instead, at each mismatch point it scans ahead in both strings to find the next matching points and uses whichever one is closer.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int greedy_diff_str(const char *s1, const char *s2, char **s1_final_ptr, char **s2_final_ptr)
{
  size_t s1_len = strlen(s1);
  size_t s2_len = strlen(s2);
  size_t s1_index = 0, s2_index = 0, final_index = 0;
  char  *s1_final, *s2_final;

  if (NULL == (s1_final = *s1_final_ptr = (char*) calloc(s1_len + s2_len + 1, 1)))
  {
    *s2_final_ptr = NULL;
    return -1;
  }

  if (NULL == (s2_final = *s2_final_ptr = (char*) calloc(s1_len + s2_len + 1, 1)))
  {
    free(s1_final);
    *s1_final_ptr = NULL;
    return -1;
  }

  while ('\0' != s1[s1_index] && '\0' != s2[s2_index])
  {
    if (s1[s1_index] == s2[s2_index])
    {
      s1_final[final_index]   = s1[s1_index++];
      s2_final[final_index++] = s2[s2_index++];
      //printf("s1: '%s'\ns2: '%s'\n", s1_final, s2_final);
    }
    else
    {
      size_t s1_dashes, s2_dashes, i;

      /* count how many dashes we'd have to add to s1 to reach next match point with s2 */

      for (i = s2_index + 1; '\0' != s2[i] && s1[s1_index] != s2[i]; ++i);
      s1_dashes = i - s2_index;

      /* count how many dashes we'd have to add to s2 to reach next match point with s1 */

      for (i = s1_index + 1; '\0' != s1[i] && s2[s2_index] != s1[i]; ++i);
      s2_dashes = i - s1_index;

      //printf("mismatch at s1[%lu] = '%c'; s2[%lu] = '%c'; s1_dashes = %lu; s2_dashes = %lu\n", s1_index, s1[s1_index], s2_index, s2[s2_index], s1_dashes, s2_dashes); 

      /* pick whichever path results in less dashes; break ties by adding dashes to string from which we've consumed more */

      if (s1_dashes < s2_dashes || (s1_dashes == s2_dashes && s1_index >= s2_index))
      {
        while (s1_dashes--)
        {
          s1_final[final_index]   = '-';
          s2_final[final_index++] = s2[s2_index++];
        }        
      }
      else
      {
        while (s2_dashes--)
        {
          s1_final[final_index]   = s1[s1_index++];
          s2_final[final_index++] = '-';
        }
      }

      //printf("s1: '%s'\ns2: '%s'\n", s1_final, s2_final);
    }
  }

  for (; '\0' != s1[s1_index]; ++s1_index, ++final_index)
  {
    s1_final[final_index] = s1[s1_index];
    s2_final[final_index] = '-';
    //printf("s1: '%s'\ns2: '%s'\n", s1_final, s2_final);
  }

  for (; '\0' != s2[s2_index]; ++s2_index, ++final_index)
  {
    s1_final[final_index] = '-';
    s2_final[final_index] = s2[s2_index];
    //printf("s1: '%s'\ns2: '%s'\n", s1_final, s2_final);
  }

  s1_final[final_index] = '\0';
  s2_final[final_index] = '\0';

  return 0;
}

int main()
{
  char s1[] = "CGGGTATCCAA", s2[] = "CCCTAGGTCCCA", *s1_fin, *s2_fin;

  printf("Input:\n");
  printf("s1: '%s'\n", s1);
  printf("s2: '%s'\n", s2);

  greedy_diff_str(s1, s2, &s1_fin, &s2_fin);

  printf("Output:\n");
  printf("s1: '%s'\n", s1_fin);
  printf("s2: '%s'\n", s2_fin);

  return 0;
}

Here's the output of a run:

john-schultzs-macbook-pro:~ jschultz$ ./a.out
Input:
s1: 'CGGGTATCCAA'
s2: 'CCCTAGGTCCCA'
Output:
s1: 'C----GGGTATCC-AA'
s2: 'CCCTAGG-T--CCCA-'

Upvotes: 1

Related Questions