Reputation: 107
I have two files as shown below. file1 has two columns and file2 has varying number of columns depending on the row. I want to compare both files and if $1 and $2 both of file1 is in a row of file2, I want to delete that row. Also file2 is comma separated. How can this be done using awk? or any other text processing tools?
file1
5052 5051
4952 4951
file2
2001, 5052, 7001, 5051, 1000
2002, 5052, 7001, 1500, 2500
2003, 5051, 3500, 4500, 4952
2004, 4952, 4999, 4500, 4951
Expected output:
2002, 5052, 7001, 1500, 2500
2003, 5051, 3500, 4500, 4952
I have tried below awk code but did not run.
awk 'NR==FNR{A[$1]=$1;A[$2]=$2; next} {if ($0=A[$1] && $0=A[$2]){next} else {print $0}' file1 file2 >> test.inp
Upvotes: 1
Views: 306
Reputation: 203219
This assumes the pairs in file1 never have the same value in both fields:
$ cat tst.awk
NR==FNR {
pairs1[NR] = $1
pairs2[NR] = $2
next
}
{
orig = $0
gsub(/[[:space:],]+/," ")
delete vals
for (i=1; i<=NF; i++) {
vals[$i]
}
for (nr in pairs1) {
if ( (pairs1[nr] in vals) && (pairs2[nr] in vals) ) {
next
}
}
print orig
}
$ awk -f tst.awk file1 file2
2002, 5052, 7001, 1500, 2500
2003, 5051, 3500, 4500, 4952
Upvotes: 2
Reputation: 22012
Would you please try the following:
awk 'NR==FNR {
a[$1] = $2 # save $1 as key, and $2 as value
next # skip the following codes for file1
}
{
len = split(gensub("^ +", "", 1), ary, " *, *")
# remove leading spaces and split on commas,
# then assign ary to the field values of file2
matched = 0 # reset the flag
for (i = 1; i <= len; i++) { # loop over the fields of file2
if (ary[i] in a) { # if $1 in file1 is found
for (j = 1; j <= len; j++) { # then go into a deeper loop to search $2 in file1
if (a[ary[i]] == ary[j]) { # if $2 is also found in the same record
matched = 1 # then set the flag
break # and exit from the inner loop
}
}
}
}
if (!matched) print # print the line unless "matched"
}' file1 file2
The result for the provided example is all records except the line:
4651, 4651, 4652, 4752, 4751
Upvotes: 0
Reputation: 12867
awk 'NR==FNR { map[$1]="1";map1[$2]="1";next } {lin=gensub(" ","","g",$0);split(lin,map3,",");ok=1;for (i in map3) { if (map1[map3[i]]==1 || map[map3[i]]==1 ) { ok=0 } } if (ok==1) { print $0 } }' file1 file2
Explanation:
awk 'NR==FNR { # Process the first file
map[$1]=""; # Set up two arrays, one for
the first space delimited
field and the other for
the second
map1[$2]="";
next # Skip to the next record
}
{
lin=gensub(" ","","g",$0); # Process the second file and remove
all spaces from the the line
putting the result in a variable
lin
split(lin,map3,","); # Split the variable lin into the
array map3 based on commas as the
separator
ok=1; # Initialise a variable
for (i in map3) {
if (map1[map3[i]] || map[map3[i]]) {
ok=0 # Loop through each entry in the map3
array (on the line) and check if it
exists in map1 or map. If it does
exist, set ok to 0
}
}
if (ok==1) {
print $0 # Only if the variable ok is 1,
print
}
}' file1 file2
Upvotes: 0