Ashi
Ashi

Reputation: 389

Column entry to header

I am having a file with following format; where column 3 is the id (condition), column 2 is sample and column 1 the number. The data is having 4 uniq Id/condition (A, B, C, U). I want to convert the data in a format where Ids (A, B, C, U) become header and have their corresponding numbers under it.

Input file

No      Sample  Id
131     E0233   A
202     E0233   B
326     E0233   C
419     E0233   U
28      E0412   A
42      E0412   B
79      E0412   C
95      E0412   U
8       E0442   A
10      E0442   B
8       E0442   C
19      E0442   U
72      E1008   A
67      E1008   B
176     E1008   C
169     E1008   U
8       E2125   A
11      E2125   B
13      E2125   C
24      E2125   U
9       E2165   A
27      E2165   B
25      E2165   C
35      E2165   U
10      E2501   A
19      E2501   B
53      E2501   C
40      E2501   U
119     E2596   A
166     E2596   B
393     E2596   C
342     E2596   U
1       E2829   B
93      E3100   A
167     E3100   B
297     E3100   C
301     E3100   U
3       E4982   A
1       E4982   B

Output File

Sample  A   B   C   U
E0233   131 202 326 419
E0412   28  42  79  95
E0442   8   10  8   19
E1008   72  67  176 169
E2125   8   11  13  24
E2165   9   27  25  35
E2501   10  19  53  40
E2596   119 166 393 342
E2829   0   1   0   0
E3100   93  167 297 301
E4982   3   1   0   0

Since I just received a comment that SO is not about asking; I just wanted to add that though I asked this question here I am also trying to go through previous solutions suggested in SO so that I can solve it. The latest one I am trying use is (Transpose in Unix). I myself also trying; but definitely my knowledge is limited. Thanks

Upvotes: 1

Views: 113

Answers (1)

RavinderSingh13
RavinderSingh13

Reputation: 133458

Could you please try following, written and tested with shown samples in GNU awk.

awk '
FNR>1{
  sample[$2]
  value[$2,$3]=$1
  if(!a[$3]++){
    b[++count]=$3
  }
}
END{
  printf("%s ","sample")
  for(i=1;i<=count;i++){
    printf("%s%s",b[i],(i==count?ORS:OFS))
  }
  for(k in sample){
    printf("%s ",k)
    for(i=1;i<=count;i++){
       printf("%d %s",(value[k,b[i]]!=""?value[k,b[i]]:""),(i==count?ORS:OFS))
    }
  }
}' Input_file

Explanation: Adding detailed explanation for above.

awk '                                        ##Starting awk program from here.
FNR>1{                                       ##Checking condition if line number is greater than 1 then do following.
  sample[$2]                                 ##Creating sample array with index of 2nd column.
  value[$2,$3]=$1                            ##Creating value with index of 2nd and 3rd column and value is 1st field.
  if(!a[$3]++){                              ##Checking condition if 3rd field is found in a first time then do following.
    b[++count]=$3                            ##Creating b with index of count increasing with 1 each time cursor comes here with value of 3rd field in it.
  }
}
END{                                         ##Starting END block of this awk program. from here.
  printf("%s ","sample")                     ##Printing sample text as a header before printing anything here.
  for(i=1;i<=count;i++){                     ##Running a for loop till value of count here.
    printf("%s%s",b[i],(i==count?ORS:OFS))   ##Printing all header values with spaces and printing new line at last.
  }
  for(k in sample){                          ##Traversing through sample array here.
    printf("%s ",k)                          ##printing value of k value.
    for(i=1;i<=count;i++){                   ##Running a for loop till value of count here.
       printf("%d %s",(value[k,b[i]]!=""?value[k,b[i]]:""),(i==count?ORS:OFS))  ##Printing value of array value with index of k and value of b[i] and printing new line at end of loop and spaces for each element.
    }
  }
}' Input_file                                ##Mentioning Input_file name here.

Upvotes: 3

Related Questions