Reputation: 1
I want to transform a file from this format
1;a;34;34;a
1;a;34;23;d
1;a;34;23;v
1;a;4;2;r
1;a;3;2;d
2;f;54;3;f
2;f;34;23;e
2;f;23;5;d
2;f;23;23;g
3;t;26;67;t
3;t;34;45;v
3;t;25;34;h
3;t;34;23;u
3;t;34;34;z
to this format
1;a;34;34;a;34;23;d;34;23;v;4;2;r;3;2;d
2;f;54;3;f;34;23;e;23;5;d;23;23;g;;;
3;t;26;67;t;34;45;v;25;34;h;34;23;u;34;34;z
These are cvs files, so it should work with awk or sed ... but I have failed till now. If the first value is the same, I want to add the last three values to the first line. And this will run till the last entry in the file.
Here some code in awk, but it does not work:
#!/usr/bin/awk -f
BEGIN{ FS = " *; *"}
{ ORS = "\;" }
{
x = $1
print $0
}
{ if (x == $1)
print $3, $4, $5
else
print "\n"
}
END{
print "\n"
}
Upvotes: 0
Views: 94
Reputation: 2514
This is going to seem a lot more complicated than the other answers, but it's adding a few things:
for(key in array)
syntax. To maintain the output order then, you can keep track of it as I've done or pipe to sort afterwards.Having matching numbers of fields in the output appears to be a requirement per the specified output. Without knowing what it should be, this awk script is built to load all the lines first, compute the maximum number of fields in an output line then output the lines with any adjustments in order.
#!/usr/bin/awk -f
BEGIN {FS=OFS=";"}
{
key = $1
# create an order array for the mac's version of awk
if( key != last_key ) {
order[++key_cnt] = key
last_key = key
}
val = a[key]
# build up an output line in array a for the given key
start = (val=="" ? $1 OFS $2 : val)
a[key] = start OFS $3 OFS $4 OFS $5
# count number of fields for each built up output line
nf_a[key] += 3
}
END {
# compute the max number of fields per any built up output line
for(k in nf_a) {
nf_max = (nf_a[k]>nf_max ? nf_a[k] : nf_max)
}
for(i=1; i<=key_cnt; i++) {
key = order[i]
# compute the number of blank flds necessary
nf_pad = nf_max - nf_a[key]
blank_flds = nf_pad!=0 ? sprintf( "%*s", nf_pad, OFS ) : ""
gsub( / /, OFS, blank_flds )
# output lines along with appended blank fields in order
print a[key] blank_flds
}
}
If the desired number of fields in the output lines is known ahead of time, simply appending the blank fields on key switch without all these arrays would work and make a simpler script.
I get the following output:
1;a;34;34;a;34;23;d;34;23;v;4;2;r;3;2;d
2;f;54;3;f;34;23;e;23;5;d;23;23;g;;;
3;t;26;67;t;34;45;v;25;34;h;34;23;u;34;34;z
Upvotes: 0
Reputation: 77145
You got good answers in awk
. Here is one in perl
:
perl -F';' -lane'
$key = join ";", @F[0..1]; # Establish your key
$seen{$key}++ or push @rec, $key; # Remember the order
push @{ $h{$key} }, @F[2..$#F] # Build your data structure
}{
$, = ";"; # Set the output list separator
print $_, @{ $h{$_} } for @rec' file # Print as per order
Upvotes: 1
Reputation: 204074
$ cat tst.awk
BEGIN { FS=OFS=";" }
{ curr = $1 FS $2 }
curr == prev {
sub(/^[^;]*;[^;]*/,"")
printf "%s", $0
next
}
{
printf "%s%s", (NR>1?ORS:""), $0
prev = curr
}
END { print "" }
$ awk -f tst.awk file
1;a;34;34;a;34;23;d;34;23;v;4;2;r;3;2;d
2;f;54;3;f;34;23;e;23;5;d;23;23;g
3;t;26;67;t;34;45;v;25;34;h;34;23;u;34;34;z
Upvotes: 2
Reputation: 44063
If I understand you correctly that you want to build a line from fields 3-5 of all lines with the same first two fields (preceded by those two fields), then
awk -F \; 'key != $1 FS $2 { if(NR != 1) print line; key = $1 FS $2; line = key } { line = line FS $3 FS $4 FS $5 } END { print line }' filename
That is
key != $1 FS $2 { # if the key (first two fields) changed
if(NR != 1) print line; # print the line (except at the very
# beginning, to not get an empty line there)
key = $1 FS $2 # remember the new key
line = key # and start building the next line
}
{
line = line FS $3 FS $4 FS $5 # take the value fields from each line
}
END { # and at the very end,
print line # print the last line (that the block above
} # cannot handle)
Upvotes: 1