Reputation: 147
I have a log_file with all my backups and a column with value yes means it won't be deleted by the retention policy (Preserved). there could be 1 or more rows having that preserved column = yes for a specific vmname.
My input is :
= FULL == 20210105 == 2100 == ASR-FULL-20210105-2100 == YES
= FULL == 20210202 == 2100 == ASR-FULL-20210202-2100 == YES
= FULL == 20210302 == 2100 == ASR-FULL-20210302-2100 == YES
= FULL == 20210406 == 2100 == ASR-FULL-20210406-2100 == YES
= FULL == 20210105 == 2146 == DNS10_7-FULL-20210105-2146 == YES
= FULL == 20210202 == 2153 == DNS10_7-FULL-20210202-2153 == YES
= FULL == 20210302 == 2148 == DNS10_7-FULL-20210302-2148 == YES
= FULL == 20210406 == 2122 == DNS10_7-FULL-20210406-2122 == YES
= FULL == 20210105 == 2105 == execnet.0-FULL-20210105-2105 == YES
= FULL == 20210202 == 2106 == execnet.0-FULL-20210202-2106 == YES
= FULL == 20210302 == 2106 == execnet.0-FULL-20210302-2106 == YES
= FULL == 20210406 == 2105 == execnet.0-FULL-20210406-2105 == YES
= FULL == 20210106 == 0200 == Prtgadmin.0-FULL-20210106-0200 == YES
= FULL == 20210105 == 2216 == sandbox.0-FULL-20210105-2216 == YES
= FULL == 20210202 == 2227 == sandbox.0-FULL-20210202-2227 == YES
= FULL == 20210406 == 2152 == sandbox.0-FULL-20210406-2152 == YES
= FULL == 20210105 == 2236 == wwwp.0-FULL-20210105-2236 == YES
= FULL == 20210202 == 2249 == wwwp.0-FULL-20210202-2249 == YES
= FULL == 20210105 == 2259 == wwws.0-FULL-20210105-2259 == YES
= FULL == 20210202 == 2314 == wwws.0-FULL-20210202-2314 == YES
= FULL == 20210105 == 2259 == webhost.0-FULL-20210105-2259 == YES
My desired output is to print the n-1 oldest matches (top n-1)
ASR-FULL-20210105-2100
ASR-FULL-20210202-2100
ASR-FULL-20210302-2100
DNS10_7-FULL-20210105-2146
DNS10_7-FULL-20210202-2153
DNS10_7-FULL-20210302-2148
execnet.0-FULL-20210105-2105
execnet.0-FULL-20210202-2106
execnet.0-FULL-20210302-2106
sandbox.0-FULL-20210105-2216
sandbox.0-FULL-20210202-2227
wwwp.0-FULL-20210105-2236
wwws.0-FULL-20210105-2259
I can so far have the below result by running the below awk commands but It shows the most recent matches instead. I'd also like to have one awk command ideally . The year filter is not that important .
# cat bkp_list.log| grep -E '*2021.*YES'| awk -F[==-] 'cnt[$8]++{if (cnt[$8]>1) print prev=$0;next}' |awk -F[==] '{print $8}'
ASR-FULL-20210202-2100
ASR-FULL-20210302-2100
ASR-FULL-20210406-2100
DNS10_7-FULL-20210202-2153
DNS10_7-FULL-20210302-2148
DNS10_7-FULL-20210406-2122
execnet.0-FULL-20210202-2106
execnet.0-FULL-20210302-2106
execnet.0-FULL-20210406-2105
sandbox.0-FULL-20210202-2227
sandbox.0-FULL-20210406-2152
wwwp.0-FULL-20210202-2249
wwws.0-FULL-20210202-2314
Thank you
Upvotes: 1
Views: 70
Reputation: 203209
With GNU awk for gensub():
$ tac file | awk '$NF=="YES" && seen[gensub(/-.*/,"",1,$8)]++{print $8}' | tac
ASR-FULL-20210105-2100
ASR-FULL-20210202-2100
ASR-FULL-20210302-2100
DNS10_7-FULL-20210105-2146
DNS10_7-FULL-20210202-2153
DNS10_7-FULL-20210302-2148
execnet.0-FULL-20210105-2105
execnet.0-FULL-20210202-2106
execnet.0-FULL-20210302-2106
sandbox.0-FULL-20210105-2216
sandbox.0-FULL-20210202-2227
wwwp.0-FULL-20210105-2236
wwws.0-FULL-20210105-2259
or with any awk:
$ tac file | awk '$NF!="YES"{next} {k=$8; sub(/-.*/,"",k)} seen[k]++{print $8}' | tac
ASR-FULL-20210105-2100
ASR-FULL-20210202-2100
ASR-FULL-20210302-2100
DNS10_7-FULL-20210105-2146
DNS10_7-FULL-20210202-2153
DNS10_7-FULL-20210302-2148
execnet.0-FULL-20210105-2105
execnet.0-FULL-20210202-2106
execnet.0-FULL-20210302-2106
sandbox.0-FULL-20210105-2216
sandbox.0-FULL-20210202-2227
wwwp.0-FULL-20210105-2236
wwws.0-FULL-20210105-2259
Upvotes: 1
Reputation: 784958
To print all but last match of a substring of $8
you may use this awk
:
awk '
$NF != "YES" {next}
{
s = $8
sub(/-FULL-.*/, "", s)
}
s == ps {
print pval
}
{
ps = s
pval = $8
}' file
ASR-FULL-20210105-2100
ASR-FULL-20210202-2100
ASR-FULL-20210302-2100
DNS10_7-FULL-20210105-2146
DNS10_7-FULL-20210202-2153
DNS10_7-FULL-20210302-2148
execnet.0-FULL-20210105-2105
execnet.0-FULL-20210202-2106
execnet.0-FULL-20210302-2106
sandbox.0-FULL-20210105-2216
sandbox.0-FULL-20210202-2227
wwwp.0-FULL-20210105-2236
wwws.0-FULL-20210105-2259
Or one liner:
awk '$NF != "YES"{next} {s=$8; sub(/-FULL-.*/, "", s)} s == ps {print pval} {ps = s; pval=$8}' file
Upvotes: 3
Reputation: 2768
If you want to filter on the column with the YES, you can do it with the contitional expressions before blocks
$ cat file
= FULL == 20210105 == 2100 == ASR-FULL-20210105-2100 == NO
= FULL == 20210202 == 2100 == ASR-FULL-20210202-2100 == YES
= FULL == 20210302 == 2100 == ASR-FULL-20210302-2100 == YES
= FULL == 20210406 == 2100 == ASR-FULL-20210406-2100 == YES
= FULL == 20210105 == 2146 == DNS10_7-FULL-20210105-2146 == YES
= FULL == 20210202 == 2153 == DNS10_7-FULL-20210202-2153 == YES
= FULL == 20210302 == 2148 == DNS10_7-FULL-20210302-2148 == YES
= FULL == 20210406 == 2122 == DNS10_7-FULL-20210406-2122 == YES
= FULL == 20210105 == 2105 == execnet.0-FULL-20210105-2105 == YES
= FULL == 20210202 == 2106 == execnet.0-FULL-20210202-2106 == YES
= FULL == 20210302 == 2106 == execnet.0-FULL-20210302-2106 == YES
= FULL == 20210406 == 2105 == execnet.0-FULL-20210406-2105 == YES
= FULL == 20210106 == 0200 == Prtgadmin.0-FULL-20210106-0200 == YES
= FULL == 20210105 == 2216 == sandbox.0-FULL-20210105-2216 == YES
= FULL == 20210202 == 2227 == sandbox.0-FULL-20210202-2227 == YES
= FULL == 20210406 == 2152 == sandbox.0-FULL-20210406-2152 == YES
= FULL == 20210105 == 2236 == wwwp.0-FULL-20210105-2236 == YES
= FULL == 20210202 == 2249 == wwwp.0-FULL-20210202-2249 == YES
= FULL == 20210105 == 2259 == wwws.0-FULL-20210105-2259 == YES
= FULL == 20210202 == 2314 == wwws.0-FULL-20210202-2314 == YES
= FULL == 20210105 == 2259 == webhost.0-FULL-20210105-2259 == YES
$ awk ' $NF == "YES" { print $(NF-2) }' file
ASR-FULL-20210202-2100
ASR-FULL-20210302-2100
ASR-FULL-20210406-2100
DNS10_7-FULL-20210105-2146
DNS10_7-FULL-20210202-2153
DNS10_7-FULL-20210302-2148
DNS10_7-FULL-20210406-2122
execnet.0-FULL-20210105-2105
execnet.0-FULL-20210202-2106
execnet.0-FULL-20210302-2106
execnet.0-FULL-20210406-2105
Prtgadmin.0-FULL-20210106-0200
sandbox.0-FULL-20210105-2216
sandbox.0-FULL-20210202-2227
sandbox.0-FULL-20210406-2152
wwwp.0-FULL-20210105-2236
wwwp.0-FULL-20210202-2249
wwws.0-FULL-20210105-2259
wwws.0-FULL-20210202-2314
webhost.0-FULL-20210105-2259
$ awk ' $NF == "NO" { print $(NF-2) }' file
ASR-FULL-20210105-2100
$
** note I changed the first line YES to NO to check the correct behaviour
Anyway, if u need to do any other special filtering, like checking the year, please specify
Upvotes: 1