Reputation: 2077
I have 10 files. For example:
$ cat ifile01.txt
1 0.22
2 0.01
4 0.32
5 0.10
. .
$ cat ifile02.txt
1 0.23
2 0.11
3 0.12
4 0.20
. .
$ cat ifile03.txt
1 0.32
2 0.64
3 0.12
5 0.90
. .
and so on for 10 files
Here 1st column is the serial number and 2nd column is their index
I would to rearrange them in the following way in one file:
outfile.txt
0.22 0.01 ? 0.32 0.10 (Transpose of the index from ifile01.txt with "?" for serial number 3, as the index for 3 is missing)
0.23 0.11 0.12 0.20 ? (Transpose of the index from ifile02.txt with "?" for serial number 5, as the index for 5 is missing)
0.32 0.64 0.12 ? 0.90 (Transpose of the index from ifile03.txt with "?" for serial number 4, as the index for 4 is missing)
I was trying with the following way in fortran, but I am looking for a awk script.
for each ifile.txt, I check the follwoing
for i in {1..50};do
if [ $i != $1 ]; then i="?"
Then append print transpose of $2 for each ifile.txt in outfile.txt
Upvotes: 1
Views: 98
Reputation: 133680
Could you please try following.
awk '
FNR==1{
count++
}
{
a[count,$1]=$2
if(!c[$1]++){
d[++occ]=$1
}
}
END{
asorti(d,e)
for(i=1;i<=count;i++){
for(k=1;k<=occ;k++){
printf("%s ",a[i,e[k]]?a[i,e[k]]:"?")
}
print ""
}
}
' Input_file1 Input_file2 Input_file3 | column -t
Output will be as follows.
0.22 0.01 ? 0.32 0.10
0.23 0.11 0.12 0.20 ?
0.32 0.64 0.12 ? 0.90
Explanation: Adding explanation for above code.
awk ' ##Starting awk program from here.
FNR==1{ ##Checking condition if this is first line of Input_file.
count++ ##increment variable count value with 1 here.
}
{
a[count,$1]=$2 ##Creating an array named a with index count and $1 whose value is $2 of current line.
if(!c[$1]++){ ##Checking condition if $1 is NOT present in array c then do following.
d[++occ]=$1 ##Creating an array named d whose index is occ variable and value is $1 of current line.
} ##Closing BLOCK for if condition.
} ##Closing main BLOCK.
END{ ##starting END block for this awk program here.
asorti(d,e) ##Using asorti to sort array d and creating array e with it(which has sorted values in it).
for(i=1;i<=count;i++){ ##Starting a for loop from i=1 to till value of count(number of files actually).
for(k=1;k<=occ;k++){ ##Starting a for loop from k=1 to till value of occ.
printf("%s ",a[i,e[k]]?a[i,e[k]]:"?") ##Printing value of array a whose index is variable i AND array e with index of k if its NOT NULL else print ? as per OP
} ##Closing BLOCK for, for Loop here.
print "" ##Printing NULL value to get a new line here.
} ##Closing BLOCK for outer for loop here.
} ##Closing BLOCK for END block of this awk program here.
' file1 file2 file3 | column -t ##Mentioning Input_file names here and using colunm -t to put equal spacing in their output.
As per @jhnc's nice comments, adding his suggested tweaked solution here too.
awk '
FNR==1{
f++
}
{
a[f,$1]=$2
}
m<$1{
m=$1
}
END{
for(i=0;i++<f;){
for(j=1;j<=m;j++){
printf"%s%s",a[i,j]?a[i,j]:"?",j<m?OFS:ORS
}
}
}' file1 file2 file3
Upvotes: 4
Reputation: 16817
Assuming serial numbers appear in the file in ascending order:
# specify the maximum or calculate by pre-pass
awk -v cols=$(awk 'm<$1{m=$1} END{print m}' ifile*.txt) '
function p() {
while (i++<cols) a[i]="?" # post-pad
for (i=1;i<=cols;i++) printf "%s%s", a[i], (i<cols?OFS:ORS) # print a row
i=0 # initialise for next row
}
FNR==1 && FNR!=NR { p() }
{ while (++i<$1) a[i]="?" } # pad missing serial numbers
{ a[i]=$2 } # store an index
END { p() }
' ifile*.txt >outfile.txt
You could format the output to match your sample by changing the print line to, for example:
for (i=1;i<=cols;i++) printf "%4s%s", a[i], (i<cols?" ":ORS)
Upvotes: 3