Reputation: 2150
I have a string input which consists of words. I am using regex.exec (g) to get all the words by function getWord(input)
So my input may look like this:
word word2
someword blah
What I get from from exec is object containing index
of match. So it is array like:
[ 'word', index: 0, input: "..."]
...
[ 'someword', index: 11, input: "..."]
...
What I need is to easily calculate that word "someword" is on line 2 by using the index(11) (as I don't have any other value telling me what is the number of lines)
Here is what I came up with: Match '\n's until you match \n with higher index then is index of word. Not sure if this may not be problematic in 10k lines file.
Snippet for idea:
getLineFromIndex: (index, input) ->
regex = /\n/g
line = 1
loop
match = regex.exec(input)
break if not match? or match.index > index
line++
return line
Kinda big optimalization can be done here. I can save the regex and last match, so I won't iterate all the input every time I want to check for line number. Regex will then be executed only when the last match has lower index then current index.
This is the final idea with optimization:
###
@variable content [String] is input content
###
getLineFromIndex: (index) ->
@lineMatcher = @lineMatcher || /\n/g
@lastLine = @lastLine || 1
if @eof isnt true
@lastMatch = @lastMatch || @lineMatcher.exec(@content)
if @eof or index < @lastMatch.index
return @lastLine
else
match = @lineMatcher.exec(@content)
if not @eof and match is null
@eof = true
else
@lastMatch = match
@lastLine++
return @lastLine
Upvotes: 5
Views: 3632
Reputation: 4452
The original proposed solution by gwer is this:
function getLineNumber(text, index) {
return text.slice(0, index).split('\n').length;
}
However there are faster solutions:
function getLineNumber(text, index) {
const match = text.slice(0, index).match(/\n/g);
return (match ? match.length : 0) + 1;
}
This one is the fastest according to my limited benchmarking, and should also use the least memory since it does not manipulate the input text at all:
function getLineNumberB(text, index) {
let line = 1;
for (let i = 0; i < index; i++) {
if (text[i] === '\n') {
line++;
}
}
return line;
}
If you want to cope with different possibly line endings, you can either preprocess the text (recommended):
text = text.replace(/\r\n|\r/g, '\n');
Or you can use these, more complex, solutions:
function getLineNumber(text, index) {
const match = text.slice(0, index).match(/\r\n|\r|\n/g);
return (match ? match.length : 0) + 1;
}
function getLineNumber(text, index) {
let line = 1;
for (let i = 0; i < index; i++) {
if (text[i] === '\n') {
line++;
}
if (text[i] === '\r') {
// A line feed after a carriage return counts as part of the same newline
if (text[i + 1] === '\n') {
i++;
}
line++;
}
}
return line;
}
Upvotes: 0
Reputation: 3939
Your pseudo-code seems to do the job. But I do not see how you can infer the line number by the offset of the searched word. I would split the input text by lines, then look over the array for the searched word, and if found return the line index.
var input= "word word2 \n"+
"someword blah";
function getLinesNumberOf( input, word){
var line_numbers=[];
input.split("\n").forEach(function(line, index){
if( line.indexOf(word)>=0 ) line_numbers.push(index);
});
return line_numbers;
}
console.log(getLinesNumberOf(input,"someword"));
I have add support for multiple occurences of the searched word.
edit
To avoid too memory consumption with large inputs, you can parse sequentially (for the same avantanges of SAX vs DOM):
function getLinesNumberOf( word, input ){
input+= "\n";//make sure to not miss the last line;
var line_numbers=[], current_line=0;
var startline_offset=0;
do{
//get the offset next of the next breakline
endline_offset= input.indexOf("\n",startline_offset);
//get the offset of the searched word in the line
word_offset= input.substring(startline_offset,endline_offset).indexOf(word, 0);
//check if the searched word has been found and if it has been found on current_line
if( word_offset >= 0 && word_offset < endline_offset ) {
//if true the current_line is stored
line_numbers.push(current_line);
}
//the offset of the next line is just after the breakline offset
startline_offset= endline_offset+1;
current_line++;
}while(endline_offset>=0);//we continue while a breakline is found
console.log(line_numbers);
}
Upvotes: 0
Reputation: 614
Upvotes: 3