qwerty ayyy
qwerty ayyy

Reputation: 385

splitting a string into a multidimensional array

I have a list of strings, I want to check if the string contains a specific word, and if it does split all the words in the string and add it to an associative array.

myString = ['RT @Arsenal: Waiting for the international', 'We’re hungry for revenge @_nachomonreal on Saturday\'s match and aiming for a strong finish']

wordtoFind = ['@Arsenal']       

I want to loop through the wordtoFind and if it is in myString, split up myString into individual words and create an object like

newWord = {@Arsenal:[{RT:1},{Waiting:1},{for:1},{the:1},{international:1}]}

for(z=0; z <wordtoFind.length; z++){
  for ( i = 0 ; i < myString.length; i++) {
    if (myString[i].indexOf(wordtoFind[z].key) > -1){
      myString[i].split(" ")
    }
  }
}

Upvotes: 0

Views: 254

Answers (4)

PostCrafter
PostCrafter

Reputation: 655

This method makes use of the forEach-function and callbacks. The containsWord-function was left with a for-loop for now to reduce some callbacks, this can obviously be changed.

var myString = [
    'RT @Arsenal: Waiting for the international',
    'We’re hungry for revenge @_nachomonreal on Saturday\'s match and aiming for a strong finish',
    '@Arsenal: one two three four two four three four three four'
];

var wordtoFind = ['@Arsenal'];

// define the preprocessor that is used before the equality check
function preprocessor(word) {
    return word.replace(':', '');
}

function findOccurences(array, search, callback, preprocessor) {
    var result = {};
    var count = 0;
    // calculate the maximum iterations
    var max = search.length * array.length;
    // iterate the search strings that should be matched
    search.forEach(function(needle) {
        // iterate the array of strings that should be searched in
        array.forEach(function(haystack) {
            if (containsWord(haystack, needle, preprocessor)) {
                var words = haystack.split(' ');
                // iterate every word to count the occurences and write them to the result
                words.forEach(function(word) {
                    countOccurence(result, needle, word);
                })
            }
            count++;
            // once every iteration finished, call the callback
            if (count == max) {
                callback && callback(result);
            }
        });
    });
}

function containsWord(haystack, needle, preprocessor) {
    var words = haystack.split(' ');
    for (var i = 0; i < words.length; i++) {
        var word = words[i];
        // preprocess a word before it's compared
        if (preprocessor) {
            word = preprocessor(word);
        }
        // if it matches return true
        if (word === needle) {
            return true;
        }
    }
    return false;
}

function countOccurence(result, key, word) {
    // add array to object if it doesn't exist yet
    if (!result.hasOwnProperty(key)) {
        result[key] = [];
    }
    var entry = result[key];
    // set the count to 0 if it doesn't exist yet
    if (!entry.hasOwnProperty(word)) {
        entry[word] = 0;
    }
    entry[word]++;
}

// call our function to find the occurences
findOccurences(myString, wordtoFind, function(result) {
    // do something with the result
    console.log(result);
}, preprocessor);

// output:
/*
 { '@Arsenal':
   [ RT: 1,
    '@Arsenal:': 2,
    Waiting: 1,
    for: 1,
    the: 1,
    international: 1,
    one: 1,
    two: 2,
    three: 3,
    four: 4 ] }
 */

Feel free to ask any questions, if the answer needs clarification.

I hope this fits your needs.

Upvotes: 1

Yan Yang
Yan Yang

Reputation: 1974

I think the key problem that stuck you is the data structure. The optimal structure should be something like this:

{
    @Arsenal:[
        {RT:1, Waiting:1, for:1, the:1, international:1},
        {xxx:1, yyy:1, zzz:3}, //for there are multiple ones in 'myString' that contain the same '@Arsenal'
        {slkj:1, sldjfl:2, lsdkjf:1} //maybe more
    ]
    someOtherWord:[
        {},
        {},
        ....
    ]
}

And the code:

var result = {};

//This function will return an object like {RT:1, Waiting:1, for:1, the:1, international:1}.
function calculateCount(string, key) {
    var wordCounts = {};
    string.split(" ").forEach(function (word) {
        if (word !== key) {
            if (wordCounts[word] === undefined) wordCounts[word] = 1;
            else wordCounts[word]++;
        }
    });
    return wordCounts;
}

//For each 'word to find' and each string that contain the 'word to find', push in that returned object {RT:1, Waiting:1, for:1, the:1, international:1}.
wordToFind.forEach(function (word) {
    var current = result[word] = [];
    myString.forEach(function (str) {
        if (str.indexOf(word) > -1) {
            current.push(
                calculateCount(str, word)
            );
        }
    });  //Missed the right parenthesis here
});

Upvotes: 0

Poul Kruijt
Poul Kruijt

Reputation: 71901

I would say something likes would work, this also counts the amount of occurrences of a word in a sentence. JavaScript does not have associative arrays like PHP for instance. They just have objects or numbered arrays:

var myString = ['RT @Arsenal: Waiting for the international', 'We’re hungry for revenge @_nachomonreal on Saturday\'s match and aiming for a strong finish'];

var wordtoFind = ['@Arsenal'];

var result = {};

for(var i = 0, l = wordtoFind.length; i < l; i++) {

    for(var ii = 0, ll = myString.length; ii < ll; ii++) {
        if(myString[ii].indexOf(wordtoFind[i]) !== -1) {
            var split = myString[ii].split(' ');
            var resultpart = {};
            for(var iii = 0, lll = split.length; iii < lll; iii++) {
                if(split[iii] !== wordtoFind[i]) {
                    if(!resultpart.hasOwnProperty(split[iii])) {
                      resultpart[split[iii]] = 0;
                    }
                    resultpart[split[iii]]++;
                }
            }
            result[wordtoFind[i]] = resultpart;
        }
    }
}

console.log(result); 
//{"@Arsenal":{"RT":1,"Waiting":1,"for":1,"the":1,"international":1}}

Upvotes: 2

senschen
senschen

Reputation: 804

You're on the right track. You just need to store the split string into the associative array variable.

var assocArr = [];
for(z=0; z <wordtoFind.length; z++){
     for ( i = 0 ; i < myString.length; i++) {
         if (myString[i].indexOf(wordtoFind[z]) > -1){

             myString[i].split(" ").forEach(function(word){
                 assocArr.push(word);
             });

         }
     }
}

Upvotes: 0

Related Questions