Reputation: 409
I am trying to find the longest sequence of digits from a string in bash using awk
alone. I have formed the below command and it is giving me the output.
$ echo "This_is_1234_and_44448888_1234567_111111_23456789_and_234" | sed 's/./\n&/g' | awk 'BEGIN{max_length=0} { tmp=match($1,/[0-9]/) ; if (tmp) { numbers[i]=numbers[i]$0;non_digit=0;} else if (non_digit<1) { non_digit=2 ;i++; } } END { i=0; for (key in numbers) { current_length=length(numbers[key]); if (current_length > max_length) { max_length = current_length; i = 0;} if (current_length >= max_length) {i++; max_length_strings[i] = numbers[key];} } print "max_length for the consecutive number portion is ",max_length; for ( j in max_length_strings ) { print "String_Part: " max_length_strings[j] " and Length: " max_length; }}'
max_length for the consecutive number portion is 8
String_Part: 23456789 and Length: 8
String_Part: 44448888 and Length: 8
But for getting each character from the string for digit checking, I am using sed
command.
So how can I avoid this use of sed and achieve the same result by using a single awk
command?
Upvotes: 2
Views: 581
Reputation: 786291
Using just awk:
s="This_is_1234_and_44448888_1234567_111111_23456789_and_234"
awk -v RS='[^[:digit:]]+' 'length($0) >= max{
max=length($0)
num[max]=(num[max]?num[max] "," $0:$0)
}
END {
printf "max length=%s, numbers: %s\n", max, num[max]
}' <<< "$s"
max length=8, numbers: 44448888,23456789
Explanation:
RS='[^[:digit:]]+'
we are making record separator as 1 or more non-digit characters thus each record becomes digit fieldsmax
variable in length($0) >= max
blocknum
arrayEND
block we just print max
and num
array entrygnu-awk
specific due to multi character RS
Upvotes: 1
Reputation: 204638
With GNU awk 4.* for FPAT and true multi-dimensional arrays:
$ cat tst.awk
BEGIN { FPAT="[0-9]+" }
{
delete strs
for (i=1;i<=NF;i++) {
cur = length($i)
strs[cur][$i]
max = (i>1 && cur>max ? cur : max)
}
for (str in strs[max]) {
printf "String_Part: %s and Length: %d\n", str, max
}
}
$ awk -f tst.awk file
String_Part: 23456789 and Length: 8
String_Part: 44448888 and Length: 8
The above assumes you want the output of the max for each input line and not across the whole file. If you do want it across the whole file then, again with GNU awk (for RT):
$ cat tst.awk
BEGIN { RS="[0-9]+" }
{
cur = length(RT)
strs[cur][RT]
max = (NR>1 && cur>max ? cur : max)
}
END {
for (str in strs[max]) {
printf "String_Part: %s and Length: %d\n", str, max
}
}
$ awk -f tst.awk file
String_Part: 23456789 and Length: 8
String_Part: 44448888 and Length: 8
Upvotes: 0