user3665852
user3665852

Reputation: 67

How do I use awk/sed to merge a field across multiple rows based on matching column values?

I am working with a CSV in bash, and attempting to merge the data in the 2nd column by matched data in the 3rd column.

My code works but the information in the other columns ends up just getting repeated instead of properly copied.

awk -F',' -v OFS=',' '{
            env_name=$1
            app_name=$4
            lob_name=$5
            if ($3 in a) {
                a[$3] = a[$3]" "$2;
            } else {
                a[$3] = $2;
            }
        }
        END { for (i in a) print env_name, i, a[i], app_name, lob_name}' input.tmp > output.tmp
This:

A,1,B,C,D
A,2,B,C,D
A,3,E,F,G
A,4,X,Y,Z
A,5,E,F,G

Should become this:

A,1 2,B,C,D
A,3 5,E,F,G
A,4,X,Y,Z

But instead we are getting this:

A,1 2,B,C,D
A,3 5,E,C,D
A,4,X,C,D

Upvotes: 1

Views: 129

Answers (2)

karakfa
karakfa

Reputation: 67567

your grouping key should be all except second field

$ awk -F, 'BEGIN {SUPSEP=OFS=FS} 
                 {k=$1 FS $3 FS $4 FS $5; a[k]=(k in a)?a[k]" "$2:$2} 
           END   {for(k in a) {split(k,p); print p[1],a[k],p[2],p[3],p[4]}}' file

A,1 2,B,C,D
A,3 5,E,F,G
A,4,X,Y,Z

perhaps can be simplified a bit

$ awk 'BEGIN {OFS=FS=","} 
             {v=$2; $2=""; k=$0; a[k]=(k in a?a[k]" "v:v)}
       END   {for(k in a) {$0=k; $2=a[k]; print}}' file

Upvotes: 2

stack0114106
stack0114106

Reputation: 8791

sed + sort + awk

$ sed 's/,/+/3;s/,/+/3' merge_csv | sort -t, -k3 | awk -F, -v OFS=, ' { if($3==p) { a=a b " "; } if(p!=$3 && NR>1) { print $1,a b,p; a="" } b=$2; p=$3 } END { print $1,a b,p } ' | tr '+' ','
A,1 2,B,C,D
A,3 5,E,F,G
A,4,X,Y,Z

$

If Perl is an option, you can try this

$ perl -F, -lane '$x=join(",",@F[-3,-2,-1]); @t=@{$kv{$x}};push(@t,$F[1]);$kv{$x}=[@t]; END { for(keys %kv) { print "A,",join(" ",@{$kv{$_}}),",$_" }} ' merge_csv
A,1 2,B,C,D
A,4,X,Y,Z
A,3 5,E,F,G

$

Input file:

$ cat merge_csv
A,1,B,C,D
A,2,B,C,D
A,3,E,F,G
A,4,X,Y,Z
A,5,E,F,G

$

Upvotes: 1

Related Questions