Reputation: 389
My File1 consist of ~100k entries and File2 ~100 entries. I want to look for matched string from File2 to File1 and print matched line along with one line above and bellow. For smaller files I used to do with the following commands. Now I need perform in on a large file and also I want all the 3 entries in one line. Is there a better way. I dont have much experience.
awk '{split($1, a, ":"); split(a[2], b, "-"); print b[1]" "b[2]}' File2.txt
grep -E "6263604 6263708|130370901 130370975" File1.txt -C 1
File1
10 . 6263344 6263490
10 . 6263604 6263708
10 . 6264818 6264947
3 . 383595 383629
3 . 384667 384714
3 . 386272 386392
6 . 130370427 130376400
6 . 130370901 130370975
6 . 130372394 130372488
2 . 114379141 114379596
2 . 114379141 114379738
2 . 114379141 114384667
2 . 114383186 114383306
2 . 114384055 114384148
2 . 114384055 114384667
2 . 114384407 114384617
2 . 114384458 114384667
File2
10:6263604-6263708
6:130370901-130370975
Expected Output
10 . 6263344 6263490 6263604 6263708 6264818 6264947
6 . 130370427 130376400 130370901 130370975 130372394 130372488
Upvotes: 1
Views: 143
Reputation: 67467
$ awk 'NR==FNR {a[$2" "$3]; next}
m {print $1,$2,pp,p,$3,$4; m=""}
$3 FS $4 in a {m=1}
{pp=p; p=$3 FS $4}
END {if(m) print $1,$2,pp,p}' FS='[:-]' file2 FS=' ' file1
10 . 6263344 6263490 6263604 6263708 6264818 6264947
6 . 130370427 130376400 130370901 130370975 130372394 130372488
assumes numbers are unique so there is no prefix match as in your sample script.
Upvotes: 3
Reputation: 203169
$ cat tst.awk
BEGIN { FS="[ .:-]+" }
{ curr = $2 " " $3 }
NR == FNR {
a[$1 " . " curr]
next
}
buf!="" { print buf, curr; buf="" }
$0 in a { buf=$1 " . " prev OFS curr }
{ prev = curr }
END { if (buf!="") print buf }
.
$ awk -f tst.awk file2 file1
10 . 6263344 6263490 6263604 6263708 6264818 6264947
6 . 130370427 130376400 130370901 130370975 130372394 130372488
Upvotes: 2