Reputation: 193
sample.txt
does have "tab-separated column", and there's semi-colon seperated
that needed to be splitted accordingly from sequence of number into repeated value.
cat sample.txt
2 2627 588;577
2 2629 566
2 2685 568-564
2 2771 573
2 2773 597
2 2779 533
2 2799 558
2 6919 726;740-742;777
2 7295 761;771-772
Please be noted that, some of line may have inverted sequence 568-564
By using previous script, I manage to split it, but failed to extract from sequence (splitted by dash)
#!/bin/sh
awk -F"\t" '{print $1}' $1 >> $2 &&
awk -F"\t" '{print $2}' $1 >> $2 &&
awk -F"\t" '{print $3}' $1 >> $2 &&
sed -i "s/^M//;s/;\r//g" $2
#!/bin/awk -f
BEGIN { FS=";"; recNr=1}
!NF { ++recNr; lineNr=0; next }
{ ++lineNr }
lineNr == 1 { next }
recNr == 1 { a[lineNr] = $0 }
recNr == 2 { b[lineNr] = $0 }
recNr == 3 {
for (i=1; i<=NF; i++) {
print a[lineNr] "," b[lineNr] "," $i
}
}
Expected
2,2627,588
2,2627,577
2,2629,566
2,2685,564
2,2685,565
2,2685,566
2,2685,567
2,2685,568
2,2771,573
2,2773,597
2,2779,533
2,2799,558
2,6919,726
2,6919,740
2,6919,741
2,6919,742
2,6919,777
2,7295,761
2,7295,771
2,7295,772
Upvotes: 1
Views: 67
Reputation: 37414
$ awk '
BEGIN {
FS="( +|;)" # input field separator is space or ;
OFS="," # output fs is comma
}
{
for(i=3;i<=NF;i++) { # from the 3rd field to the end
n=split($i,t,"-") # split on - if any. below loop from smaller to greater
if(n) # in case of empty fields
for(j=(t[1]<t[n]?t[1]:t[n]); j<=(t[1]<t[n]?t[n]:t[1]);j++)
print $1,$2,j # output
}
}' file
Output
2,2627,588
2,2627,577
2,2629,566
2,2685,564 <─┐
2,2685,565 │
2,2685,566 ├─ wrong order, from smaller to greater
2,2685,567 │
2,2685,568 <─┘
2,2771,573
2,2773,597
2,2779,533
2,2799,558
2,6919,726
2,6919,740
2,6919,741
2,6919,742
2,6919,777
2,7295,761
2,7295,771
2,7295,772
Tested on GNU awk, mawk, Busybox awk and awk version 20121220.
Upvotes: 1
Reputation: 133538
Could you please try following(will add explanation in few mins).
awk '
BEGIN{
OFS=","
}
{
num=split($NF,array,";")
for(i=1;i<=num;i++){
if(array[i]~/-/){
split(array[i],array2,"-")
to=array2[1]>array2[2]?array2[1]:array2[2]
from=array2[1]<array2[2]?array2[1]:array2[2]
while(from<=to){
print $1,$2,from++
}
}
else{
print $1,$2,array[i]
}
from=to=""
}
}
' Input_file
Explanation: Adding detailed explanation for above code.
awk ' ##Starting awk program from here.
BEGIN{ ##Starting BEGIN section of code here.
OFS="," ##Setting OFS as comma here.
}
{
num=split($NF,array,";") ##Splitting last field of line into an array named array with delimiter semi-colon here.
for(i=1;i<=num;i++){ ##Starting a for loop from 1 to till value of num which is actually length of array created in previous step.
if(array[i]~/-/){ ##Checking condition if array value with index i is having dash then do followong.
split(array[i],array2,"-") ##Split value of array with index i to array2 here with delimiter -(dash) here.
to=array2[1]>array2[2]?array2[1]:array2[2] ##Creating to variable which will compare 2 elements of array2 and have maximum value out of them here.
from=array2[1]<array2[2]?array2[1]:array2[2] ##Creating from variable which will compare 2 elements of array2 and will have minimum out of them.
while(from<=to){ ##Running while loop from variable from to till value of variable to here.
print $1,$2,from++ ##Printing 1st, 2nd fields with value of from variable and increasing from value with 1 each time it comes here.
}
}
else{ ##Mention else part of if condition here.
print $1,$2,array[i] ##Printing only 1st, 2nd fields along with value of array with index i here.
}
from=to="" ##Nullifying variables from and to here.
}
}
' Input_file ##Mentioning Input_file name here.
Adding link for conditional statements ?
and :
explanation as per James sir's comments:
https://www.gnu.org/software/gawk/manual/html_node/Conditional-Exp.html
For shown sample output will be as follows.
2,2627,588
2,2627,577
2,2629,566
2,2685,564
2,2685,565
2,2685,566
2,2685,567
2,2685,568
2,2771,573
2,2773,597
2,2779,533
2,2799,558
2,6919,726
2,6919,740
2,6919,741
2,6919,742
2,6919,777
2,7295,761
2,7295,771
2,7295,772
Upvotes: 2