masterbay
masterbay

Reputation: 23

Awk command to compare specific columns in file1 to file2 and display output

File1

111,222,560,0.7
111,333,560,0.2
111,444,560,0.1

File2

2017,111,560,0.0537
2018,111,560,0.0296
2019,111,560,0.0624

Desired output:

2017,111,560,0.0537,222,0.7
2018,111,560,0.0296,222,0.7
2019,111,560,0.0624,222,0.7
2017,111,560,0.0537,333,0.2
2018,111,560,0.0296,333,0.2
2019,111,560,0.0624,333,0.2
2017,111,560,0.0537,444,0.1
2018,111,560,0.0296,444,0.1
2019,111,560,0.0296,444,0.1

I tried awk NR==FNR command but it’s displaying only the last matched...

reads every line and check if column 1 and 3 of file1 exists in file2:

2017,111,560,0.0537,444,0.1
2018,111,560,0.0296,444,0.1
2019,111,560,0.0296,444,0.1

Upvotes: 0

Views: 136

Answers (3)

RavinderSingh13
RavinderSingh13

Reputation: 133750

Following awk may help you in same.

awk -F, '
FNR==NR{
  a[FNR]=$0;
  next
}
{
  for(i=1;i<=length(a);i++){
    print a[i] FS $2 FS $NF
}
}'   Input_file2  Input_file1

Adding explanation too for code as follows.

awk -F, '                   ##Setting field separator as comma here for all the lines.
FNR==NR{                    ##Using FNR==NR condition which will be only TRUE then first Input_file named File2 is being read.
                            ##FNR and NR both indicates the number of lines for a Input_file only difference is FNR value will be RESET whenever a new file is being read and NR value will be keep increasing till all Input_files are read.
  a[FNR]=$0;                ##Creating an array named a whose index is FNR(current line) value and its value is current line value.
  next                      ##Using next statement will sip all further statements now.
}
{
  for(i=1;i<=length(a);i++){##Starting a for loop from variable i value from 1 to length of array a value. This will be executed on 2nd Input_file reading.
    print a[i] FS $2 FS $NF ##Printing the value of array a whose index is variable i and printing 2nd and last field of current line.
}
}' File2 File1              ##Mentioning the Input_file names here.

Upvotes: 0

karakfa
karakfa

Reputation: 67557

another one with join/awk

$ join -t, -j99 file2 file1 | 
  awk -F, -v OFS=, '$3==$6 && $4==$8 {print $2,$3,$4,$5,$7,$9}'

Upvotes: 0

Akshay Hegde
Akshay Hegde

Reputation: 16997

I tried awk NR==FNR command but it’s displaying only the last matched...

reads every line and check if column 1 and 3 of file1 exists in file2:

Using awk and sort

awk 'BEGIN{
           # set input and output field separator
           FS=OFS=","               
     }
     # read first file f1
     # index key field1 and field3 of file1 (f1)
     {
         k=$1 FS $3
     }

     # save 2nd and last field of file1 (f1) in array a, key being k
     FNR==NR{
         a[k]=(k in a ? a[k] RS:"") $2 OFS $NF; 

         # stop processing go to next line
         next
     }

     # read 2nd file f2 from here 
     # 2nd and 3rd field of fiel2 (f2) used as key
     {
         k=$2 FS $3
     }

     # if key exists in array a
     k in a{
         # split array value by RS row separator, and put it in array t
         split(a[k],t,RS); 

         # iterate array t, print and sort
         for(i=1; i in t; i++)
              print $0,t[i] | "sort -t, -nk5" 
     }
     ' f1 f2

Test Results:

$ cat f1
111,222,560,0.7
111,333,560,0.2
111,444,560,0.1

$ cat f2
2017,111,560,0.0537
2018,111,560,0.0296
2019,111,560,0.0624

$ awk 'BEGIN{FS=OFS=","}{k=$1 FS $3}FNR==NR{a[k]=(k in a ? a[k] RS:"") $2 OFS $NF; next}{k=$2 FS $3}k in a{split(a[k],t,RS); for(i=1; i in t; i++)print $0,t[i] | "sort -t, -nk5" }' f1 f2
2017,111,560,0.0537,222,0.7
2018,111,560,0.0296,222,0.7
2019,111,560,0.0624,222,0.7
2017,111,560,0.0537,333,0.2
2018,111,560,0.0296,333,0.2
2019,111,560,0.0624,333,0.2
2017,111,560,0.0537,444,0.1
2018,111,560,0.0296,444,0.1
2019,111,560,0.0624,444,0.1

Upvotes: 1

Related Questions