Reputation: 2675
How can I go about writing a function that compares two strings and returns an object containing the characters that occur in both strings, and how many times they occur?
Here is some example input and output:
"Leopard", "Lion" // { "L": 2, "o": 2 }
"Hello", "World" // { "l": 3, "o": 2 }
"Flower", "Power" // { "o": 2, "w": 2, "e": 2, "r": 2 }
"phone", "Memphis" // { "p": 2, "h": 2, "e": 2 }
Upvotes: 4
Views: 1309
Reputation:
If you're working with small strings, there isn't any need to complicate things. The following method is simple, at the expense of performance when dealing with very large strings.
First we need to define an object to hold the counts for our duplicate characters.
Then we need to iterate each string. There are many way of going about iterating a string, but in the following examples I either:
Array#forEach
on the string using the Function#call
method (ES5 example), orfor...of
loop (ES6 example).Both of these methods work because a string is an example of a built-in iterable object.
While iterating the strings, we need to first check if the character exists in the duplicate object.
Using this method, you don't count characters that aren't duplicates, and you don't try to verify characters that you have already verified. This should offer some significant speed improvements with longer strings.
Once you have finished iterating both strings, simply return the duplicate holder object.
const countLetters = (v1, v2) => {
const out = {};
for(let char of v1) (out[char] || v2.indexOf(char) !== -1) && (out[char] = (out[char] || 0) + 1);
for(let char of v2) (out[char] || v1.indexOf(char) !== -1) && (out[char] = (out[char] || 0) + 1);
return out;
}
const test = (v1,v2) => (console.log(JSON.stringify(countLetters(v1, v2))),test);
test("Leopard", "Lion") // { "L": 2, "o": 2 }
("Hello", "World") // { "l": 3, "o": 2 }
("Flower", "Power") // { "o": 2, "w": 2, "e": 2, "r": 2 }
("phone", "Memphis") // { "p": 2, "h": 2, "e": 2 }
("Bookkeeper", "Zookeeper") // { "o": 4, "k": 3, "e": 6, "p": 2, "r": 2 }
("Some thing", "Other thing"); // { "e": 2, " ": 2, "t": 3, "h": 3, "i": 2, "n": 2, "g": 2 }
var countLetters = function(v1, v2) {
var out = {};
Array.prototype.forEach.call(v1, function(char) {
if(out[char] || v2.indexOf(char) !== -1) out[char] = (out[char] || 0) + 1;
});
Array.prototype.forEach.call(v2, function(char) {
if(out[char] || v1.indexOf(char) !== -1) out[char] = (out[char] || 0) + 1;
});
return out;
}
var test = function(v1,v2){ return (console.log(JSON.stringify(countLetters(v1, v2))),test) };
test("Leopard", "Lion") // { "L": 2, "o": 2 }
("Hello", "World") // { "l": 3, "o": 2 }
("Flower", "Power") // { "o": 2, "w": 2, "e": 2, "r": 2 }
("phone", "Memphis") // { "p": 2, "h": 2, "e": 2 }
("Bookkeeper", "Zookeeper") // { "o": 4, "k": 3, "e": 6, "p": 2, "r": 2 }
("Some thing", "Other thing"); // { "e": 2, " ": 2, "t": 3, "h": 3, "i": 2, "n": 2, "g": 2 }
Performance becomes a concern when you start working with very large strings. The following method employs some algorithmic wizardry to gain performance.
First, find the frequency of each letter in both strings. Working with a sorted array is faster than working with an unsorted array, and saves some time counting because we can split the string into groups of common characters instead of counting each character.
I use a Map
object to take advantage of the Map#has
method—which is much faster than Array#indexOf
—in the next function.
// Calculate the frequency of each character in a string
const calculateCharacterFrequency = string => {
// Split the string into individual characters
const array = [...string];
// Sort the split string
const sorted = array.sort();
// Join the split string
const joined = sorted.join("");
// Split the string into groups of common characters
const matches = joined.match(/(.)\1*/g);
// Iterate the groups
const frequency = matches.reduce(
(frequency, character) => {
// Insert char and frequency into map
frequency.set(character[0], character.length);
// return frequency map for use in next iteration
return frequency;
},
// This is the map that will be passed as `frequency`
new Map()
);
// Return the map
return frequency;
};
Next, find the common characters between each string and create a map containing the frequency of each common character.
The biggest performance gain here is to create a unique set of all of the characters that occur in either string. I utilize the unique nature of a Set
object to remove duplicates from the array, there are other ways of going about removing duplicates from an array but that was the fastest one that I tested. This way, we are only iterating one unqiue set and checking that the character occurs in both maps.
// Calculate the frequency of each character two strings have in common
const calculateCommonCharacterFrequency = (v1, v2) => {
// Get frequency map of first string
v1 = calculateCharacterFrequency(v1);
// Get frequency map of second string
v2 = calculateCharacterFrequency(v2);
// Create an array containing a list of all characters occuring in either string
const combinedCharacterArray = [...v1.keys(), ...v2.keys()];
// Remove duplicate characters
const combinedUniqueCharacterSet = new Set(combinedCharacters);
// Convert set back to array
const combinedUniqueCharacterArray = Array.from(combinedUniqueSet);
// Iterate the unique array of common characters
const commonFrequency = combinedUniqueCharacterArray.reduce(
(commonFrequency, character) => {
// Check if both strings have the character
const isCommon = v1.has(character) && v2.has(character);
if(isCommon) {
// Find the sum of the frequency of the character in both strings
const combinedFrequency = v1.get(character) + v2.get(character);
// Insert character and frequency into map
commonFrequency.set(character, combinedFrequency);
}
// return frequency map for use in next iteration
return commonFrequency;
},
// This is the map that will be passed as `commonFrequency`
new Map()
);
// Return the map
return commonFrequency;
};
The above example has been expanded for readability and explanation purposes. The following example has been condensed to save space.
const calcCharFreq = string => [...string].sort().join("").match(/(.)\1*/g).reduce((freq, char) => (freq.set(char[0], char.length), freq), new Map());
const calcCommonCharFreq = (v1, v2) => {
v1 = calcCharFreq(v1);
v2 = calcCharFreq(v2);
return Array.from(new Set([...v1.keys(), ...v2.keys()])).reduce((dup, char) => ((v1.has(char) && v2.has(char)) && dup.set(char, v1.get(char) + v2.get(char)), dup), new Map());
};
const test = (v1,v2) => (console.log('{ '+Array.from(calcCommonCharFreq(v1, v2)).reduce((pairs, value) => (pairs.push('"'+value[0]+'": '+value[1]), pairs), []).join(", ")+' }'), test);
test("Leopard", "Lion") // { "L": 2, "o": 2 }
("Hello", "World") // { "l": 3, "o": 2 }
("Flower", "Power") // { "o": 2, "w": 2, "e": 2, "r": 2 }
("phone", "Memphis") // { "p": 2, "h": 2, "e": 2 }
("Bookkeeper", "Zookeeper") // { "o": 4, "k": 3, "e": 6, "p": 2, "r": 2 }
("Some thing", "Other thing"); // { "e": 2, " ": 2, "t": 3, "h": 3, "i": 2, "n": 2, "g": 2 }
Upvotes: 5
Reputation: 216
Here is a more readable version of Redu's algo...
function count(v1, v2) {
let map = {};
Array.prototype.forEach.call(v1, char => map[char] = map[char]++ || 1);
Array.prototype.forEach.call(v2, char => {
if(map[char]) map[char] += 1;
});
for(let char in map) {
if(map[char] < 2) {
delete map[char];
}
}
console.log(map)
}
count('Lion', 'Leopard');
Upvotes: 0
Reputation:
I would start off with a simple function, called countChars
, to count the occurrences of characters in a single string. You can find this elsewhere on SO, but I provide a simple version. The output is an object, keyed by character, with counts as values.
We can then solve the problem of finding matching characters in two strings as a sort of merging of two objects, which we implement in a function called combineObjectsByKey
.
function countChars(str) {
return str.split('').reduce((result, chr) => {
if (!(chr in result)) result[chr] = 0;
result[chr]++;
return result;
}, {});
}
function combineObjectsByKey(o1, o2, fn) {
const result = {};
Object.keys(o1).forEach(key => {
if (key in o2) result[key] = fn(o1[key], o2[key]);
});
return result;
}
function add(a, b) { return a + b; }
function matchingCounts(str1, str2) {
return combineObjectsByKey(countChars(str1), countChars(str2), add);
}
console.log(matchingCounts("Leopard", "Lion"));
You could also simply concatenate the strings, count characters in the concatenated string, and then remove characters not occurring in both strings:
function matchingCounts(str1, str2) {
const counts = countChars(str1 + str2);
Object.keys(counts).forEach(chr => {
if (!str1.includes(chr) || !str2.includes(chr)) delete counts[chr];
});
return counts;
}
Upvotes: 0
Reputation: 470
Though this example can probably be more elegant, I think the approach is simpler and more readable.
Here's a Pen to play around with.
function getRepeated(x, y) {
let result = [];
x.split('').forEach(value => {
let array = testCharacterToString(value, y);
if (array.length > 0) {
result.push(array[0]);
}
});
y.split('').forEach(value => {
let array = testCharacterToString(value, x);
if (array.length > 0) {
result.push(array[0]);
}
});
result = result.reduce(function(prev, cur) {
prev[cur] = (prev[cur] || 0) + 1;
return prev;
}, {});
return JSON.stringify(result) // {"o":4,"k":3,"e":6,"p":2,"r":2}
}
function testCharacterToString(char, string) {
return string.split('').filter(value => {
return value === char;
});
}
var test = function(arg1,arg2){ return (console.log(getRepeated(arg1, arg2)),test) };
test("Leopard", "Lion") // { "L": 2, "o": 2 }
("Hello", "World") // { "l": 3, "o": 2 }
("Flower", "Power") // { "o": 2, "w": 2, "e": 2, "r": 2 }
("phone", "Memphis") // { "p": 2, "h": 2, "e": 2 }
("Bookkeeper", "Zookeeper") // { "o": 4, "k": 3, "e": 6, "p": 2, "r": 2 }
("Some thing", "Other thing"); // { "e": 2, " ": 2, "t": 3, "h": 3, "i": 2, "n": 2, "g": 2 }
Upvotes: 0
Reputation: 26161
This is a very nice question. If I got it right you may easily do in O(2n) as follows;
function relationIndex(s){
var a = s.split(/\s+/),
hash = a[0].length < a[1].length ? Array.prototype.reduce.call(a[1], (p,c) => (p[c] && (p[c] = Math.abs(p[c])+1),p), Array.prototype.reduce.call(a[0], (p,c) => (p[c] = --p[c] || -1,p),{}))
: Array.prototype.reduce.call(a[0], (p,c) => (p[c] && (p[c] = Math.abs(p[c])+1),p), Array.prototype.reduce.call(a[1], (p,c) => (p[c] = --p[c] || -1,p),{}));
for (var k in hash) hash[k] <= 0 && delete hash[k];
return hash;
}
var str = "Lion Leopard";
console.log(relationIndex(str));
str = "Brontosaurus Butterfly";
console.log(relationIndex(str));
str = "London Paris";
console.log(relationIndex(str));
str = "phone Memphis";
console.log(relationIndex(str));
Upvotes: 1
Reputation: 1996
Here is an option that should be FAST (basically because it is not doing anything fancy at all). This will account for character case sensitivity, all lower case a-z chars, all upper case A-Z chars, numeric 0-9 chars, and empty space chars.
here I just use parallel arrays for the ASCII char values of the chars for the first and second words.
var firstString = "hello world";
var secondString = "FOLLOW me";
var firstChars = [];
var secondChars = [];
var arrLength = 122;
// 0 - 122 covers ASCII values for all chars a-z (lowers), A-Z (uppers), numbers, and empty spaces.
//initialize array to arrLength number of elements, all set to 0.
for (var i = 0; i < arrLength; i++)
{
firstChars.push(0);
secondChars.push(0);
}
//count first word chars
for (var i = 0; i < firstString.length; i++)
firstChars[firstString.charCodeAt(i)]++;
//count second word chars
for (var i = 0; i < secondString.length; i++)
secondChars[secondString.charCodeAt(i)]++;
//output matching chars and the count of occurences.
for (var i = 0; i < arrLength; i++)
{
if (firstChars[i] > 0 && secondChars[i] > 0)
{
var letter = i == 32? "Empty Space" : String.fromCharCode(i);
var count = firstChars[i] + secondChars[i];
console.log(letter + " : " + count);
}
}
Here is a jsFiddle of this code to play around with and test.
Upvotes: 0