JohnnnnnY
JohnnnnnY

Reputation: 35

awk distance matrix

I'd like to get some help with a work, but i don't really know how to get to it. I have to make a distance matrix, froma a 3 column text, that looks like this :

AN51 AN50 88
AN52 AN50 167
AN52 AN51 125
AN53 AN50 81
AN53 AN51 93
AN53 AN52 170
AN54 AN50 120
AN54 AN51 119
AN54 AN52 117
AN54 AN53 66 

The output is have to be a square shapet "2d" matrix

    AN50  AN51  AN52 AN53
AN50 0     88   167   81
AN51 88    0    125   93
AN52 167   125   0    170
AN53 81    93   170    0

I tried the reading in from file with $0, and it finally read all the fields but in different alignment.

Upvotes: 1

Views: 362

Answers (2)

Dennis Williamson
Dennis Williamson

Reputation: 360345

This is a much more general version of Michael Barber's answer which will work, generally, with any number of columns and rows.

awk '
BEGIN {
    OFS = "\t"
}
{
    matrix[$1,$2] = $3
    matrix[$2,$1] = $3
    names[$1] = $1
    names[$2] = $2
}
END {
    num = asort(names)
    for (i = 1; i <= num; i++) {
        printf("%s%s", OFS, names[i])
    }
    printf("\n")
    for (i = 1; i <= num; i++) {
        printf("%s", names[i])
        for (j = 1; j <= num; j++) {
            printf("%s%4d", OFS, matrix[names[i], names[j]])
        }
        printf("\n")
    }
}'

Example output:

        AN50    AN51    AN52    AN53    AN54
AN50       0      88     167      81     120
AN51      88       0     125      93     119
AN52     167     125       0     170     117
AN53      81      93     170       0      66
AN54     120     119     117      66       0

Note that your sample input data yields the output I've shown which includes the complete data in it. Note also that Michael's answer only outputs what your sample output includes which is incomplete.

Edit:

Here's a version that doesn't require asort() and should work on non-GNU version of AWK:

awk '
BEGIN {
    OFS = "\t"
}
{
    matrix[$1,$2] = $3
    matrix[$2,$1] = $3
    names[$1] = $1
    names[$2] = $2
}
END {
    for (i in names) {
        printf("%s%s", OFS, i)
    }
    printf("\n")
    for (i in names) {
        printf("%s", i)
        for (j in names) {
            printf("%s%4d", OFS, matrix[i,j])
        }
        printf("\n")
    }
}'

It will print the names in an unpredictable order.

Upvotes: 3

Michael J. Barber
Michael J. Barber

Reputation: 25052

awk '
{ 
  matrix[$1,$2] = $3
  matrix[$2,$1] = $3 
} 
END { 
  printf("\tAN50\tAN51\tAN52\tAN53\n")
  for (n=0;n<4;n++) { 
    printf("AN5%d\t", n)
    for (m=0; m<4; m++) { 
      printf("%d\t", matrix["AN5"n,"AN5"m]) 
    } 
    printf("\n") 
  } 
}'

Upvotes: 3

Related Questions