Reputation: 67
I am working with a CSV in bash, and attempting to merge the data in the 2nd column by matched data in the 3rd column.
My code works but the information in the other columns ends up just getting repeated instead of properly copied.
awk -F',' -v OFS=',' '{
env_name=$1
app_name=$4
lob_name=$5
if ($3 in a) {
a[$3] = a[$3]" "$2;
} else {
a[$3] = $2;
}
}
END { for (i in a) print env_name, i, a[i], app_name, lob_name}' input.tmp > output.tmp
This:
A,1,B,C,D
A,2,B,C,D
A,3,E,F,G
A,4,X,Y,Z
A,5,E,F,G
Should become this:
A,1 2,B,C,D
A,3 5,E,F,G
A,4,X,Y,Z
But instead we are getting this:
A,1 2,B,C,D
A,3 5,E,C,D
A,4,X,C,D
Upvotes: 1
Views: 129
Reputation: 67567
your grouping key should be all except second field
$ awk -F, 'BEGIN {SUPSEP=OFS=FS}
{k=$1 FS $3 FS $4 FS $5; a[k]=(k in a)?a[k]" "$2:$2}
END {for(k in a) {split(k,p); print p[1],a[k],p[2],p[3],p[4]}}' file
A,1 2,B,C,D
A,3 5,E,F,G
A,4,X,Y,Z
perhaps can be simplified a bit
$ awk 'BEGIN {OFS=FS=","}
{v=$2; $2=""; k=$0; a[k]=(k in a?a[k]" "v:v)}
END {for(k in a) {$0=k; $2=a[k]; print}}' file
Upvotes: 2
Reputation: 8791
sed + sort + awk
$ sed 's/,/+/3;s/,/+/3' merge_csv | sort -t, -k3 | awk -F, -v OFS=, ' { if($3==p) { a=a b " "; } if(p!=$3 && NR>1) { print $1,a b,p; a="" } b=$2; p=$3 } END { print $1,a b,p } ' | tr '+' ','
A,1 2,B,C,D
A,3 5,E,F,G
A,4,X,Y,Z
$
If Perl is an option, you can try this
$ perl -F, -lane '$x=join(",",@F[-3,-2,-1]); @t=@{$kv{$x}};push(@t,$F[1]);$kv{$x}=[@t]; END { for(keys %kv) { print "A,",join(" ",@{$kv{$_}}),",$_" }} ' merge_csv
A,1 2,B,C,D
A,4,X,Y,Z
A,3 5,E,F,G
$
Input file:
$ cat merge_csv
A,1,B,C,D
A,2,B,C,D
A,3,E,F,G
A,4,X,Y,Z
A,5,E,F,G
$
Upvotes: 1