Prabu
Prabu

Reputation: 1255

String Formatting using C / C++

Recently I was asked in an interview to convert the string "aabbbccccddddd" to "a2b3c4d5". The goal is to replace each repeated character with a single occurrence and a repeat count. Here 'a' is repeated twice in the input, so we have to write it as 'a2' in the output. Also I need to write a function to reverse the format back to the original one (e.g. from the string "a2b3c4d5" to "aabbbccccddddd"). I was free to use either C or C++. I wrote the below code, but the interviewer seemed to be not very happy with this. He asked me to try a smarter way than this.

In the below code, I used formatstring() to eliminate repeated chars by just adding the repeated count and used reverseformatstring() to convert back to the original string.

void formatstring(char* target, const char* source) {
  int charRepeatCount = 1;
  bool isFirstChar = true;
  while (*source != '\0') {
    if (isFirstChar) {
      // Always add the first character to the target
      isFirstChar = false;
      *target = *source;
      source++; target++;
    } else {
      // Compare the current char with previous one,
      // increment repeat count
      if (*source == *(source-1)) {
        charRepeatCount++;
        source++;
      } else {
        if (charRepeatCount > 1) {
          // Convert repeat count to string, append to the target
          char repeatStr[10];
          _snprintf(repeatStr, 10, "%i", charRepeatCount);
          int repeatCount = strlen(repeatStr);
          for (int i = 0; i < repeatCount; i++) {
            *target = repeatStr[i];
            target++;
          }
          charRepeatCount = 1; // Reset repeat count
        }
        *target = *source;
        source++; target++;
      }
    }
  }
  if (charRepeatCount > 1) {
    // Convert repeat count to string, append it to the target
    char repeatStr[10];
    _snprintf(repeatStr, 10, "%i", charRepeatCount);
    int repeatCount = strlen(repeatStr);
    for (int i = 0; i < repeatCount; i++) {
      *target = repeatStr[i];
      target++;
    }
  }
  *target = '\0';
}

void reverseformatstring(char* target, const char* source) {
  int charRepeatCount = 0;
  bool isFirstChar = true;
  while (*source != '\0') {
    if (isFirstChar) {
      // Always add the first character to the target
      isFirstChar = false;
      *target = *source;
      source++; target++;
    } else {
      // If current char is alpha, add it to the target
      if (isalpha(*source)) {
        *target = *source;
        target++; source++;
      } else {
        // Get repeat count of previous character
        while (isdigit(*source)) {
          int currentDigit = (*source) - '0';
          charRepeatCount = (charRepeatCount == 0) ?
              currentDigit : (charRepeatCount * 10 + currentDigit);
          source++;
        }
        // Decrement repeat count as we have already written
        // the first unique char to the target
        charRepeatCount--; 
        // Repeat the last char for this count
        while (charRepeatCount > 0) {
          *target = *(target - 1);
          target++;
          charRepeatCount--;
        }
      }
    }
  }
  *target = '\0';
}

I didn't find any issues with above code. Is there any other better way of doing this?

Upvotes: 5

Views: 1033

Answers (8)

jthill
jthill

Reputation: 60565

Try to make do with less boilerplate:

#include <iostream>
#include <iterator>
#include <sstream>
using namespace std;

template<typename in_iter,class ostream>
void torle(in_iter i, ostream &&o)
{
        while (char c = *i++) {
                size_t n = 1;
                while ( *i == c )
                        ++n, ++i;
                o<<c<<n;
        }
}

template<class istream, typename out_iter>
void fromrle(istream &&i, out_iter o)
{
        char c; size_t n;
        while (i>>c>>n)
                while (n--) *o++=c;
}

int main()
{
    typedef ostream_iterator<char> to;
    string line; stringstream converted;
    while (getline(cin,line)) {
        torle(begin(line),converted);
        cout<<converted.str()<<'\n';
        fromrle(converted,ostream_iterator<char>(cout));
        cout<<'\n';
    }
}

Upvotes: 0

kotlomoy
kotlomoy

Reputation: 1430

My naive approach:

void pack( char const * SrcStr, char * DstBuf ) {

    char const * Src_Ptr = SrcStr;
    char * Dst_Ptr = DstBuf;

    char c = 0;
    int RepeatCount = 1;

    while( '\0' != *Src_Ptr ) {

        c = *Dst_Ptr = *Src_Ptr;
        ++Src_Ptr; ++Dst_Ptr;

        for( RepeatCount = 1; *Src_Ptr == c; ++RepeatCount ) {
            ++Src_Ptr;
        }

        if( RepeatCount > 1 ) {
            Dst_Ptr += sprintf( Dst_Ptr, "%i", RepeatCount );
            RepeatCount = 1;
        }
    }

    *Dst_Ptr = '\0';
};

void unpack( char const * SrcStr, char * DstBuf ) {

    char const * Src_Ptr = SrcStr;
    char * Dst_Ptr = DstBuf;

    char c = 0;

    while( '\0' != *Src_Ptr ) {

        if( !isdigit( *Src_Ptr ) ) {
            c = *Dst_Ptr = *Src_Ptr;
            ++Src_Ptr; ++Dst_Ptr;

        } else {
            int repeat_count = strtol( Src_Ptr, (char**)&Src_Ptr, 10 );
            memset( Dst_Ptr, c, repeat_count - 1 );
            Dst_Ptr += repeat_count - 1;
        }
    }

    *Dst_Ptr = '\0';
};

But if interviewer asks for error-handling than solution turns to be much more complex (and ugly). My portable approach:

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>

// for MSVC
#ifdef _WIN32
    #define snprintf sprintf_s
#endif

int pack( char const * SrcStr, char * DstBuf, size_t DstBuf_Size ) {

    int Err = 0;

    char const * Src_Ptr = SrcStr;
    char * Dst_Ptr = DstBuf;

    size_t SrcBuf_Size = strlen( SrcStr ) + 1;
    char const * SrcBuf_End = SrcStr + SrcBuf_Size;
    char const * DstBuf_End = DstBuf + DstBuf_Size;

    char c = 0;
    int RepeatCount = 1;

    // don't forget about buffers intercrossing
    if( !SrcStr || !DstBuf || 0 == DstBuf_Size \
        || (DstBuf < SrcBuf_End && DstBuf_End > SrcStr) ) {

        return 1;
    }

    // source string must contain no digits
    // check for destination buffer overflow
    while( '\0' != *Src_Ptr && Dst_Ptr < DstBuf_End - 1 \
        && !isdigit( *Src_Ptr ) && !Err ) {

        c = *Dst_Ptr = *Src_Ptr;
        ++Src_Ptr; ++Dst_Ptr;

        for( RepeatCount = 1; *Src_Ptr == c; ++RepeatCount ) {
            ++Src_Ptr;
        }

        if( RepeatCount > 1 ) {
            int res = snprintf( Dst_Ptr, DstBuf_End - Dst_Ptr - 1, "%i" \
                , RepeatCount );
            if( res < 0 ) {
                Err = 1;
            } else {
                Dst_Ptr += res;
                RepeatCount = 1;
            }
       }
    }

    *Dst_Ptr = '\0';

    return Err;
};

int unpack( char const * SrcStr, char * DstBuf, size_t DstBuf_Size ) {

    int Err = 0;

    char const * Src_Ptr = SrcStr;
    char * Dst_Ptr = DstBuf;

    size_t SrcBuf_Size = strlen( SrcStr ) + 1;
    char const * SrcBuf_End = SrcStr + SrcBuf_Size;
    char const * DstBuf_End = DstBuf + DstBuf_Size;

    char c = 0;

    // don't forget about buffers intercrossing
    // first character of source string must be non-digit
    if( !SrcStr || !DstBuf || 0 == DstBuf_Size \
        || (DstBuf < SrcBuf_End && DstBuf_End > SrcStr) || isdigit( SrcStr[0] ) ) {

        return 1;
    }

    // check for destination buffer overflow
    while( '\0' != *Src_Ptr && Dst_Ptr < DstBuf_End - 1 && !Err ) {

        if( !isdigit( *Src_Ptr ) ) {
            c = *Dst_Ptr = *Src_Ptr;
            ++Src_Ptr; ++Dst_Ptr;

        } else {
            int repeat_count = strtol( Src_Ptr, (char**)&Src_Ptr, 10 );
            if( !repeat_count || repeat_count - 1 > DstBuf_End - Dst_Ptr - 1 ) { 
                Err = 1;
            } else {
                memset( Dst_Ptr, c, repeat_count - 1 );
                Dst_Ptr += repeat_count - 1;
            }
        }
    }

    *Dst_Ptr = '\0';

    return Err;
};

int main() {

    char str[] = "aabbbccccddddd";
    char buf1[128] = {0};
    char buf2[128] = {0};

    pack( str, buf1, 128 );
    printf( "pack: %s -> %s\n", str, buf1 );

    unpack( buf1, buf2, 128 );
    printf( "unpack: %s -> %s\n", buf1, buf2 );

    return 0;
}

Test: http://ideone.com/Y7FNE3. Also works in MSVC.

Upvotes: 0

Filipe Gon&#231;alves
Filipe Gon&#231;alves

Reputation: 21233

I think your code is too complex for the task. Here's my approach (using C):

#include <ctype.h>
#include <stdio.h>

void format_str(char *target, char *source) {
    int count;
    char last;
    while (*source != '\0') {
        *target = *source;
        last = *target;
        target++;
        source++;
        for (count = 1; *source == last; source++, count++)
            ; /* Intentionally left blank */
        if (count > 1)
            target += sprintf(target, "%d", count);
    }
    *target = '\0';
}

void convert_back(char *target, char *source) {
    char last;
    int val;
    while (*source != '\0') {
        if (!isdigit((unsigned char) *source)) {
            last = *source;
            *target = last;
            target++;
            source++;
        }
        else {
            for (val = 0; isdigit((unsigned char) *source); val = val*10 + *source - '0', source++)
                ; /* Intentionally left blank */
            while (--val) {
                *target = last;
                target++;
            }
        }
    }
    *target = '\0';
}

format_str compresses the string, and convert_back uncompresses it.

Upvotes: 5

Adam Liss
Adam Liss

Reputation: 48330

Since several others have suggested very reasonable alternatives, I'd like to offer some opinions on what I think is your underlying question: "He asked me to try a smarter way than this.... Is there any other better way of doing this?"

When I interview a developer, I'm looking for signals that tell me how she approaches a problem:

  1. Most important, as H2CO3 noted, is correctness: will the code work? I'm usually happy to overlook small syntax errors (forgotten semicolons, mismatched parens or braces, and so on) if the algorithm is sensible.

  2. Proper use of the language, especially if the candidate claims expertise or has had extensive experience. Does he understand and use idioms appropriately to write straightforward, uncomplicated code?

  3. Can she explain her train of thought as she formulates her solution? Is it logical and coherent, or is it a shotgun approach? Is she able and willing to communicate well?

  4. Does he account for edge cases? And if so, does the intrinsic algorithm handle them, or is everything a special case? Although I'm happiest if the initial algorithm "just works" for all cases, I think it's perfectly acceptable to start with a verbose approach that covers all cases (or simply to add a "TODO" comment, noting that more work needs to be done), and then simplifying later, when it may be easier to notice patterns or duplicated code.

  5. Does she consider error-handling? Usually, if a candidate starts by asking whether she can assume the input is valid, or with a comment like, "If this were production code, I'd check for x, y, and z problems," I'll ask what she would do, then suggest she focus on a working algorithm for now and (maybe) come back to that later. But I'm disappointed if a candidate doesn't mention it.

  6. Testing, testing, testing! How will the candidate verify his code works? Does he walk through the code and suggest test cases, or do I need to remind him? Are the test cases sensible? Will they cover the edge cases?

  7. Optimization: as a final step, after everything works and has been validated, I'll sometimes ask the candidate if she can improve her code. Bonus points if she suggests it without my prodding; negative points if she spends a lot of effort worrying about it before the code even works.


Applying these ideas to the code you wrote, I'd make these observations:

Using const appropriately is a plus, as it shows familiarity with the language. During an interview I'd probably ask a question or two about why/when to use it.

The proper use of char pointers throughout the code is a good sign. I tend to be pedantic about making the data types explicit within comparisons, particularly during interviews, so I'm happy to see, e.g. while (*source != '\0') rather than the (common, correct, but IMO less careful) while(*source).

isFirstChar is a bit of a red flag, based on my "edge cases" point. When you declare a boolean to keep track of the code's state, there's often a way of re-framing the problem to handle the condition intrinsically. In this case, you can use charRepeatCount to decide if this is the first character in a possible series, so you won't need to test explicitly for the first character in the string.

By the same token, repeated code can also be a sign that an algorithm can be simplified. One improvement would be to move the conversion of charRepeatCount to a separate function. See below for an even better solution.

It's funny, but I've found that candidates rarely add comments to their code during interviews. Kudos for helpful ones, negative points for those of the ilk "Increment the counter" that add verbosity without information. It's generally accepted that, unless you're doing something weird (in which case you should reconsider what you've written), you should assume the person who reads your code is familiar with the programming language. So comments should explain your thought process, not translate the code back to English.

Excessive levels of nested conditionals or loops can also be a warning. You can eliminate one level of nesting by comparing each character to the next one instead of the previous one. This works even for the last character in the string, because it will be compared to the terminating null character, which won't match and can be treated like any other character.

There are simpler ways to convert charRepeatCount from an int to a string. For example, _snprintf() returns the number of bytes it "prints" to the string, so you can use
target += _snprintf(target, 10, "%i", charRepeatCount);

In the reversing function, you've used the ternary operator perfectly ... but it's not necessary to special-case the zero value: the math is the same regardless of its value. Again, there are also standard utility functions like atoi() that will convert the leading digits of a string into an integer for you.

Experienced developers will often include the increment or decrement operation as part of the condition in a loop, rather than as a separate statement at the bottom: while(charRepeatCount-- > 0). I'd raise an eyebrow but give you a point or two for humor and personality if you wrote this using the slide operator: while (charRepeatCount --> 0). But only if you'd promise not to use it in production.

Good luck with your interviewing!

Upvotes: 7

kunal
kunal

Reputation: 966

try this

std::string str="aabbbccccddddd";

for(int i=0;i<255;i++)
{
    int c=0;
    for(int j=0;j<str.length();j++)
    {
        if(str[j] == i)
            c++;
    }
    if(c>0)
    printf("%c%d",i,c);
}

Upvotes: 0

jrok
jrok

Reputation: 55425

Perhaps the interviewer wanted to test your knowledge of existing standard library tools. Here's how my take could look in C++:

#include <string>
#include <sstream>
#include <algorithm>
#include <iostream>

typedef std::string::const_iterator Iter;

std::string foo(Iter first, Iter last)
{
    Iter it = first;
    std::ostringstream result;
    while (it != last) {
        it = std::find_if(it, last, [=](char c){ return c != *it; });
        result << *first << (it - first);
        first = it;
    }
    return result.str();    
}

int main()
{
    std::string s = "aaabbbbbbccddde";
    std::cout << foo(s.begin(), s.end());
}

An extra check is needed for empty input.

Upvotes: 0

user529758
user529758

Reputation:

The approach/algorithm is fine, perhaps you could refine and shrink the code a bit (by doing something simpler, there's no need to solve this in an overly complex way). And choose an indentation style that actually makes sense.

A C solution:

void print_transform(const char *input)
{
    for (const char *s = input; *s;) {
        char current = *s;
        size_t count = 1;
        while (*++s == current) {
            count++;
        }

        if (count > 1) {
            printf("%c%zu", current, count);
        } else {
            putc(current, stdout);
        }
    }

    putc('\n', stdout);
}

(This can be easily modified so that it returns the transformed string instead, or writes it to a long enough buffer.)

A C++ solution:

std::string transform(const std::string &input)
{
    std::stringstream ss;
    std::string::const_iterator it = input.begin();

    while (it != input.end()) {
        char current = *it;
        std::size_t count = 1;
        while (++it != input.end() && *it == current) {
            count++;
        }

        if (count > 1) {
            ss << current << count;
        } else {
            ss << current;
        }
    }

    return ss.str();
}

Upvotes: 7

Polentino
Polentino

Reputation: 907

Your code "works", but it doesn't adhere to some common patterns used in C++. You should have:

  • used std::string instead of plain char* array(s)
  • pass that string as const reference to avoid modification, since you write the result somewhere else;
  • use C++11 features such as ranged based for loops and lambdas as well.

I think the interviewer's purpose was to test your ability to deal with the C++11 standard, since the algorithm itself was pretty trivial.

Upvotes: 0

Related Questions