Reputation: 1345
I am trying to merge rows with a matching first cell in a CSV file. So that the following cells are placed within their rightful columns based on matching strings.
I have a file with the following contents:
item,pieces,color,last order
"apples","4 pieces"
"apples","red color"
"apples","last ordered 2 hours ago"
"mangos","1 piece"
"mangos","last ordered 1 day ago"
"carrots","10 pieces"
"carrots","orange color"
Which then should be merged into the following:
item,pieces,color,last order
"apples","4 pieces","red color","last ordered 2 hours ago"
"mangos","1 piece","","last ordered 1 day ago"
"carrots","10 pieces","orange color",""
The code I have used for this:
awk '{ printf "%s", $0; if (NR % 3 == 0) print ""; else printf "," }' file.csv
This method of merging three rows at a time worked with a little manual editing for as long as all items had the three pieces of data "pieces", "color" & "last order".
However, this is not working as different items have different sets of data.
Upvotes: 0
Views: 114
Reputation: 1871
The following implementation works so long as all our items are grouped together in blocks of lines within the input file. The implementation caches fields for a single output record and print
s that when it sees a different item (or alternatively when the script END
's).
awk -F, -v OFS=, '
NR==1 { f1=$1; f2=$2; f3=$3; f4=$4 }
FNR==1 { next }
f1 != $1 {
print f1, f2, f3, f4
f1=$1
f2=f3=f4="\"\""
}
$2 ~ /piece/ { f2 = $2; next }
$2 ~ /color/ { f3 = $2; next }
$2 ~ /last order/ { f4 = $2; next }
END { print f1, f2, f3, f4 }
' file.csv
If items are not grouped, we have to cache the whole file(s)... In the following, f
basically stores/represents the file as a table. The k
array stores the the line number for the output record of a given item so that when we print
, our output records will come out in the input file's order.
awk -F, -v OFS=, '
BEGIN { n = 0; split("", k); split("", f) }
NR==1 { f[++n,1]=$1; f[n,2]=$2; f[n,3]=$3; f4[n,4]=$4 }
FNR==1 { next }
!($1 in k) {
k[$1] = ++n
f[n,1]=$1
f[n,2]=f[n,3]=f[n,4]="\"\""
}
$2 ~ /piece/ { f[k[$1],2] = $2; next }
$2 ~ /color/ { f[k[$1],3] = $2; next }
$2 ~ /last order/ { f[k[$1],4] = $2; next }
END {
for (i=1; i<=n; ++i)
print f[i,1], f[i,2], f[i,3], f[i,4]
}
' file.csv
Notes:
The NR/FNR dance in both implementations is an attempt at handling the header line and multiple input files (and we may or may not care about handling multiple file input).
The latter implementation permits overwriting of an item's fields... If we don't want that behavior, we would have to change the logic -- the changes to the logic would be about the lines that have the regex comparisons on input field $2
.
Upvotes: 0
Reputation: 203607
$ cat tst.awk
BEGIN { FS=OFS="," }
{ gsub(/"/,"") }
NR==1 {
print
sub(/s,/,",")
numTags = split($0,tags)
next
}
$1 != prev {
if ( prev != "" ) {
prt()
}
prev=$1
}
{
tag = tags[1]
tag2val[tag] = $1
for (tagNr=2; tagNr<=numTags; tagNr++) {
tag = tags[tagNr]
if ( index($2,tag) ) {
tag2val[tag] = $2
next
}
}
}
END { prt() }
function prt( tagNr,tag,val) {
for (tagNr=1; tagNr<=numTags; tagNr++) {
tag = tags[tagNr]
val = tag2val[tag]
printf "\"%s\"%s", val, (tagNr<numTags ? OFS : ORS)
}
delete tag2val
}
$ awk -f tst.awk file
item,pieces,color,last order
"apples","4 pieces","red color","last ordered 2 hours ago"
"mangos","1 piece","","last ordered 1 day ago"
"carrots","10 pieces","orange color",""
Upvotes: 2
Reputation: 785186
You may try this awk
:
awk 'BEGIN {FS=OFS=","} NR == 1 {print; next} item != $1 {if (item != "") print item, pieces, color, order; item = $1; pieces = $2; color = order = "\"\""; next} {if ($2 ~ /color/) color = $2; else order = $2} END {print item, pieces, color, order}' file
item,pieces,color,last order
"apples","4 pieces","red color","last ordered 2 hours ago"
"mangos","1 piece","","last ordered 1 day ago"
"carrots","10 pieces","orange color",""
A more readable version:
awk 'BEGIN {
FS = OFS = ","
}
NR == 1 {
print
next
}
item != $1 {
if (item != "")
print item, pieces, color, order
item = $1
pieces = $2
color = order = "\"\""
next
}
{
if ($2 ~ /color/)
color = $2
else
order = $2
}
END {
print item, pieces, color, order
}' file
Upvotes: 2
Reputation: 12728
check this out :
awk -F, '{ if (f == $1) { for (c=0; c <length($1) + length(FS); c++) printf " "; print $2 FS $3 } else { print $0 } } { f = $1 }' yourfile.csv
Upvotes: -1