Reputation: 13
I have a file with columns like this:
TNFRSF14 chr1 2487803,2489164,2489781,2491261,2492062,2493111,2494303,2494586, 2488172,2489273,2489907,2491417,2492153,2493254,2494335,2497061,
ID3 chr1 23884420,23885425,23885617, 23884906,23885510,23886285,
In case the tabs cannot be seen on your browser:
TNFRSF14"\t"chr1"\t"2487803,2489164,2489781,2491261,2492062,2493111,2494303,2494586,"\t"2488172,2489273,2489907,2491417,2492153,2493254,2494335,2497061,
ID3"\t"chr1"\t"23884420,23885425,23885617,"\t"23884906,23885510,23886285,
I would like to have the output say:
TNFRSF14 chr1 2487803 2488172
TNFRSF14 chr1 2489164 2489273
...
ID3 chr1 23885425 23885510
ID3 chr1 23885617 23886285
As you can see, my original input is of varying lengths in columns 3 and 4, but the length of column 3 will always equal column 4. So far I have been able to split the files into varying column lengths, and have a python script that can place them. I was hoping there was a way for awk to do this though!
Thanks for any suggestions!
Upvotes: 1
Views: 432
Reputation: 8164
you can try to use split function
gawk '{
split($3,a,",");
split($4,b,",");
for(i=1; i<length(a); i++){
print $1, $2, a[i], b[i];
}
}' input
Note: length(array) is gnu-awk specific
you get:
TNFRSF14 chr1 2487803 2488172 TNFRSF14 chr1 2489164 2489273 TNFRSF14 chr1 2489781 2489907 TNFRSF14 chr1 2491261 2491417 TNFRSF14 chr1 2492062 2492153 TNFRSF14 chr1 2493111 2493254 TNFRSF14 chr1 2494303 2494335 TNFRSF14 chr1 2494586 2497061 ID3 chr1 23884420 23884906 ID3 chr1 23885425 23885510 ID3 chr1 23885617 23886285
Upvotes: 4
Reputation: 203229
$ cat tst.awk
BEGIN{ FS=OFS="\t" }
{
n = split($3,a,/,/)
split($4,b,/,/)
for (i=1;i<n;i++) {
print $1, $2, a[i], b[i]
}
}
$
$ awk -f tst.awk file
TNFRSF14 chr1 2487803 2488172
TNFRSF14 chr1 2489164 2489273
TNFRSF14 chr1 2489781 2489907
TNFRSF14 chr1 2491261 2491417
TNFRSF14 chr1 2492062 2492153
TNFRSF14 chr1 2493111 2493254
TNFRSF14 chr1 2494303 2494335
TNFRSF14 chr1 2494586 2497061
ID3 chr1 23884420 23884906
ID3 chr1 23885425 23885510
ID3 chr1 23885617 23886285
Upvotes: 2
Reputation: 4841
awk -F',? ' '
{
split($3, a, /,/)
split($4, b, /,/)
for (i in a) print $1, $2, a[i], b[i]
}' file
Upvotes: 1