40mikemike
40mikemike

Reputation: 89

How to find the longest substring containing two unique repeating characters

The task is to find the longest substring in a given string that is composed of any two unique repeating characters
Ex. in an input string "aabadefghaabbaagad", the longest such string is "aabbaa"

I came up with the following solution but wanted to see if there is a more efficient way to do the same

import java.util.*; 

public class SubString {
    public static void main(String[] args) { 
    //String inStr="defghgadaaaaabaababbbbbbd";
    String inStr="aabadefghaabbaagad";
    //String inStr="aaaaaaaaaaaaaaaaaaaa";
    System.out.println("Input string is         "+inStr);

    StringBuilder sb = new StringBuilder(inStr.length());
    String subStr="";
    String interStr="";
    String maxStr="";
    int start=0,length=0, maxStart=0, maxlength=0, temp=0;

    while(start+2<inStr.length())   
    {    int i=0;
         temp=start;
         char x = inStr.charAt(start);
         char y = inStr.charAt(start+1);
         sb.append(x);
         sb.append(y);

         while( (x==y) && (start+2<inStr.length()) )
         {    start++;
              y = inStr.charAt(start+1);
              sb.append(y);
         }

         subStr=inStr.substring(start+2);
         while(i<subStr.length())
         {    if(subStr.charAt(i)==x || subStr.charAt(i)==y )
              {    sb.append(subStr.charAt(i));
                   i++;
              }
              else
                   break;
         }

         interStr= sb.toString();
         System.out.println("Intermediate string "+ interStr);
         length=interStr.length();
         if(maxlength<length)
         {    maxlength=length;
              length=0;
              maxStr = new String(interStr);
              maxStart=temp;
         }

         start++;
         sb.setLength(0);
   }

   System.out.println("");
   System.out.println("Longest string is "+maxStr.length()+" chars long "+maxStr);  
}
}

Upvotes: 8

Views: 16294

Answers (7)

dgupta3091
dgupta3091

Reputation: 1197

The idea here is to add occurrence of each character to a hashmap, and when the hasmap size increases more than k, remove the unwanted character.

private static int getMaxLength(String str, int k) {
        if (str.length() == k)
            return k;
        var hm = new HashMap<Character, Integer>();
        int maxLength = 0;
        int startCounter = 0;
        for (int i = 0; i < str.length(); i++) {
            char c = str.charAt(i);
            if (hm.get(c) != null) {
                hm.put(c, hm.get(c) + 1);
            } else {
                hm.put(c, 1);
            }
            //atmost K different characters
            if (hm.size() > k) {
                maxLength = Math.max(maxLength, i - startCounter);
                while (hm.size() > k) {
                    char t = str.charAt(startCounter);
                    int count = hm.get(t);
                    if (count > 1) {
                        hm.put(t, count - 1);
                    } else {
                        hm.remove(t);
                    }
                    startCounter++;
                }
            }
        }
        return maxLength;
    }

Upvotes: 0

Jai
Jai

Reputation: 3310

The problem can be solved in O(n). Idea is to maintain a window and add elements to the window till it contains less or equal 2, update our result if required while doing so. If unique elements exceeds than required in window, start removing the elements from left side.

#code
from collections import defaultdict

def solution(s, k):
  length = len(set(list(s)))
  count_dict = defaultdict(int)
  if  length < k:
    return "-1"

  res = []
  final = []
  maxi = -1

  for i in range(0, len(s)):

    res.append(s[i])
    if len(set(res)) <= k:
      if len(res) >= maxi and len(set(res)) <= k :
        maxi = len(res)
        final = res[:]
        count_dict[maxi] += 1

    else:
      while len(set(res)) != k:
        res = res[1:]
      if maxi <= len(res) and len(set(res)) <= k:
        maxi =  len(res)
        final = res[:]
        count_dict[maxi] += 1
    return len(final)

print(solution(s, k))

Upvotes: 0

venky
venky

Reputation: 1

import java.util.ArrayList;
import java.util.Collections; 
import java.util.HashMap; import java.util.Iterator; import java.util.List;
import java.util.Map;

public class PrintLLargestSubString {

    public static void main(String[] args){         String string =
            "abcdefghijklmnopqrstuvbcdefghijklmnopbcsdcelfabcdefghi";
    List<Integer> list = new ArrayList<Integer> ();         List<Integer>
    keyList = new ArrayList<Integer> ();        List<Integer> Indexlist = new
    ArrayList<Integer> ();      List<Integer> DifferenceList = new
    ArrayList<Integer> ();      Map<Integer, Integer> map = new
    HashMap<Integer, Integer>();        int index = 0;      int len = 1;        int
    j=1;        Indexlist.add(0);       for(int i = 0; i< string.length() ;i++) {
        if(j< string.length()){
            if(string.charAt(i) < string.charAt(j)){
                len++;
                list.add(len);
            } else{
                index= i+1;
                Indexlist.add(index); //                    System.out.println("\nindex" + index);              
                len=1;              
            }           }           j++;        } //        System.out.println("\nlist" +list);         System.out.println("index List" +Indexlist); //     int n =
    Collections.max(list); //       int ind = Collections.max(Indexlist);
    //      System.out.println("Max number in IndexList  " +n);
    //      System.out.println("Index Max is " +ind);

    //Finding max difference in a list of elements      for(int diff = 0;
    diff< Indexlist.size()-1;diff++){           int difference =
            Indexlist.get(diff+1)-Indexlist.get(diff);
    map.put(Indexlist.get(diff), difference);
    DifferenceList.add(difference);         }
    System.out.println("Difference between indexes" +DifferenceList); //        Iterator<Integer> keySetIterator = map.keySet().iterator(); //      while(keySetIterator.hasNext()){
    //          Integer key = keySetIterator.next();
    //          System.out.println("index: " + key + "\tDifference  "
    +map.get(key)); // //       } //        System.out.println("Diffferenece List" +DifferenceList);        int maxdiff = Collections.max(DifferenceList);      System.out.println("Max diff is " + maxdiff);       //////      Integer
    value = maxdiff;        int key = 0;        keyList.addAll(map.keySet());
    Collections.sort(keyList);      System.out.println("List of al keys"
            +keyList); //       System.out.println(map.entrySet());         for(Map.Entry entry: map.entrySet()){           if(value.equals(entry.getValue())){
    key =  (int) entry.getKey();            }       }       System.out.println("Key value of max difference starting element is " + key);   

    //Iterating key list and finding next key value         int next = 0 ;
    int KeyIndex = 0;       int b;      for(b= 0; b<keyList.size(); b++) {
        if(keyList.get(b)==key){
            KeyIndex = b;           }                       }       System.out.println("index of key\t" +KeyIndex);         int nextIndex = KeyIndex+1;         System.out.println("next Index = " +nextIndex);         next = keyList.get(nextIndex);
            System.out.println("next Index value is = " +next);

            for( int z = KeyIndex; z < next ; z++) {
                System.out.print(string.charAt(z));         }   }

            }

Upvotes: 0

sinoTrinity
sinoTrinity

Reputation: 1195

A general solution: Longest Substring Which Contains K Unique Characters.

int longestKCharSubstring(string s, int k) {
    int i, max_len = 0, start = 0;
    // either unique char & its last pos
    unordered_map<char, int> ht;

    for (i = 0; i < s.size(); i++) {
        if (ht.size() < k || ht.find(s[i]) != ht.end()) {
            ht[s[i]] = i;
        } else {
            // (k + 1)-th char
            max_len = max(max_len, i - start);
            // start points to the next of the earliest char
            char earliest_char;
            int earliest_char_pos = INT_MAX;
            for (auto key : ht)
                if (key.second < earliest_char_pos)
                    earliest_char = key.first;
            start = ht[earliest_char] + 1;
            // replace earliest_char
            ht.erase(earliest_char);
            ht[s[i]] = i;
        }
    }
    // special case: e.g., "aaaa" or "aaabb" when k = 2
    if (k == ht.size())
        max_len = max(max_len, i - start);

    return max_len;
}

Upvotes: 0

Ismail Hawayel
Ismail Hawayel

Reputation: 2453

so the way I think of this is to solve it in 2 steps

  • scan the entire string to find continuous streams of the same letter
  • loop the extracted segments and condense them until u get a gap.

This way you can also modify the logic to scan for longest sub-string of any length not just 2.

class Program
{

    static void Main(string[] args)     
    {
        //.         
        string input = "aabbccdddxxxxxxxxxxxxxxxxx";
        int max_chars = 2;

        //.
        int flip = 0;

        var scanned = new List<string>();

        while (flip > -1)
        {
            scanned.Add(Scan(input, flip, ref flip));
        }

        string found = string.Empty;
        for(int i=0;i<scanned.Count;i++)
        {
            var s = Condense(scanned, i, max_chars);
            if (s.Length > found.Length)
            { 
                found = s;
            }
        }

        System.Console.WriteLine("Found:" + found);
        System.Console.ReadLine();


    }

    /// <summary>
    /// 
    /// </summary>
    /// <param name="s"></param>
    /// <param name="start"></param>
    /// <returns></returns>
    private static string Scan(string s, int start, ref int flip)
    { 
        StringBuilder sb = new StringBuilder();

        flip = -1;

        sb.Append(s[start]);

        for (int i = start+1; i < s.Length; i++)
        {
            if (s[i] == s[i - 1]) { sb.Append(s[i]); continue; } else { flip=i; break;}
        }

        return sb.ToString();
    }

    /// <summary>
    /// 
    /// </summary>
    /// <param name="list"></param>
    /// <param name="start"></param>
    /// <param name="repeat"></param>
    /// <param name="flip"></param>
    /// <returns></returns>
    private static string Condense(List<string> list, int start, int repeat)
    {
        StringBuilder sb = new StringBuilder();

        List<char> domain = new List<char>(){list[start][0]};

        for (int i = start; i < list.Count; i++)
        {
            bool gap = false;

            for (int j = 0; j < domain.Count; j++)
            {
                if (list[i][0] == domain[j])
                { 
                    sb.Append(list[i]);
                    break;
                }
                else if (domain.Count < repeat)
                {
                    domain.Add(list[i][0]);
                    sb.Append(list[i]);
                    break;
                }
                else
                {
                    gap=true;
                    break;
                }
            }

            if (gap) { break;}
        }

        return sb.ToString();
    }


}

Upvotes: 0

Alessio
Alessio

Reputation: 2068

Same question to me, I wrote this code

public int getLargest(char [] s){
    if(s.length<1) return s.length;
    char c1 = s[0],c2=' ';
    int start = 1,l=1, max=1;

    int i = 1;
    while(s[start]==c1){
        l++;
        start++;
        if(start==s.length) return start;
    }

    c2 = s[start];
    l++;

    for(i = l; i<s.length;i++){
        if(s[i]==c1 || s[i]==c2){
            if(s[i]!=s[i-1])
                start = i;
            l++;
        }
        else {
            l = i-start+1;  
            c1 = s[start];
            c2 = s[i];
            start = i;
        }
        max = Math.max(l, max);
    }
    return max;
}

Upvotes: 1

Aasmund Eldhuset
Aasmund Eldhuset

Reputation: 37990

Here's a hint that might guide you towards a linear-time algorithm (I assume that this is homework, so I won't give the entire solution): At the point where you have found a character that is neither equal to x nor to y, it is not necessary to go all the way back to start + 1 and restart the search. Let's take the string aabaaddaa. At the point where you have seen aabaa and the next character is d, there is no point in restarting the search at index 1 or 2, because in those cases, you'll only get abaa or baa before hitting d again. As a matter of fact, you can move start directly to index 3 (the first index of the last group of as), and since you already know that there is a contiguous sequene of as up to d, you can move i to index 5 and continue.

Edit: Pseudocode below.

// Find the first letter that is not equal to the first one, 
// or return the entire string if it consists of one type of characters
int start = 0;
int i = 1;
while (i < str.length() && str[i] == str[start])
    i++;
if (i == str.length())
    return str;

// The main algorithm
char[2] chars = {str[start], str[i]};
int lastGroupStart = 0;
while (i < str.length()) {
    if (str[i] == chars[0] || str[i] == chars[1]) {
        if (str[i] != str[i - 1])
            lastGroupStart = i;
    }
    else {
        //TODO: str.substring(start, i) is a locally maximal string; 
        //      compare it to the longest one so far
        start = lastGroupStart;
        lastGroupStart = i;
        chars[0] = str[start];
        chars[1] = str[lastGroupStart];
    }
    i++;
}
//TODO: After the loop, str.substring(start, str.length()) 
//      is also a potential solution.

Upvotes: 8

Related Questions