Reputation: 75
We have some | delimited input CSV files. Our requirement is if last field value contains 9999-12-31 then we need to subtract 3 days from 2nd last field and keep the last field as it is. And if last field value does not have 9999-12-31 then subtract 3 days from both last and second last field.
Sample Input file:-
#Code|sequence|Code_ID|margin_ID|Eff_date|End_date
M|X19|0002|F3|2020-12-10|9999-12-31
M|X19|0002|F3|2021-01-15|9999-12-31
M|X19|0002|F3|2010-10-10|2019-11-09
M|X19|0002|F3|2010-03-09|2011-04-02
Expected Output file:-
#Code|sequence|Code_ID|margin_ID|Eff_date|End_date
M|X19|0002|F3|2020-12-07|9999-12-31
M|X19|0002|F3|2021-01-12|9999-12-31
M|X19|0002|F3|2010-10-07|2019-11-06
M|X19|0002|F3|2010-03-06|2011-03-30
Can anyone suggest with script or command to get the expected output?
I am trying with the following command, but getting error.
for i in `ls *.csv`
do
awk -F"|" -v OFS="|" '{if(NR>1) { if ($NF~/9999-12-31/) { $(NF-1)="date -d \"$(date -d \""$(NF-1)"\")-3days\" \"+%Y-%m-%d \""; print $0}
else {$(NF-1)="date -d \"$(date -d \""$(NF-1)"\")-3days\" \"+%Y-%m-%d \""; $NF="date -d \"$(date -d \""$NF"\")-3days\" \"+%Y-%m-%d \"";
print $0} } }' $i >> temp/new_$i
done
Getting awk: syntax error near line 1
EDIT by Ed Morton I ran the above awk script through gawk -o-
(pretty-print) to make it legible and here is the vastly clearer result:
{
if (NR > 1) {
if ($NF ~ /9999-12-31/) {
$(NF - 1) = "date -d \"$(date -d \"" $(NF - 1) "\")-3days\" \"+%Y-%m-%d \""
print $0
} else {
$(NF - 1) = "date -d \"$(date -d \"" $(NF - 1) "\")-3days\" \"+%Y-%m-%d \""
$NF = "date -d \"$(date -d \"" $NF "\")-3days\" \"+%Y-%m-%d \""
print $0
}
}
}
Upvotes: 0
Views: 94
Reputation: 203368
Using GNU awk for date functions (will be orders of magnitude faster than any code calling the Unix date
command) and gensub():
$ cat tst.awk
BEGIN {
FS=OFS="|"
deltaSecs = -3*24*60*60
}
NR > 1 {
$(NF-1) = applyDelta($(NF-1),deltaSecs)
if ( $NF != "9999-12-31" ) {
$NF = applyDelta($NF,deltaSecs)
}
}
{ print }
function applyDelta(date,delta) { return secs2date( date2secs(date) + delta ) }
function date2secs(date) { return mktime(gensub(/-/," ","g",date) " 12 0 0") }
function secs2date(secs) { return strftime("%Y-%m-%d",secs) }
$ awk -f tst.awk file
#Code|sequence|Code_ID|margin_ID|Eff_date|End_date
M|X19|0002|F3|2020-12-07|9999-12-31
M|X19|0002|F3|2021-01-12|9999-12-31
M|X19|0002|F3|2010-10-07|2019-11-06
M|X19|0002|F3|2010-03-06|2011-03-30
If you don't have GNU awk but somehow do have GNU date then just change the definition of the final 2 functions above to:
function date2secs(date, cmd, line, secs) {
cmd = "date +\"%s\" --date=\"" date " 12:00:00\""
secs = ( (cmd | getline line) > 0 ? line : -1 )
close(cmd)
return secs
}
function secs2date(secs, cmd, line, date) {
cmd = "date +\"%Y-%m-%d\" --date=\"@" secs "\""
date = ( (cmd | getline line) > 0 ? line : -1 )
close(cmd)
return date
}
but as mentioned previously it will run much slower than the gawk version as it's spawning a subshell every time it calls date
.
Upvotes: 1
Reputation: 133458
EDIT: With function approach in awk
try following. Written and tested with shown samples only in GNU awk
. I have run this on single file we can also pass csv files into this program without using bash loop.
awk '
BEGIN{
threedaysSecs=(3 * 24 * 60 * 60)
FS=OFS="|"
}
function getDate(field){
value=(mktime(substr($field,1,4)" "substr($field,6,2)" "substr($field,9,2) " 00 00 00"))-threedaysSecs
$field=strftime("%Y-%m-%d", value)
}
$NF=="9999-12-31"{
getDate((NF-1))
}
$NF!="9999-12-31"{
getDate((NF-1))
getDate(NF)
}
1
' Input_file
Could you please try following. Without function approach.
awk '
BEGIN{
threedaysSecs=(3 * 24 * 60 * 60)
FS=OFS="|"
}
$NF=="9999-12-31"{
value=(mktime(substr($5,1,4)" "substr($5,6,2)" "substr($5,9,2) " 00 00 00"))-threedaysSecs
$5=strftime("%Y-%m-%d", value)
}
$NF!="9999-12-31"{
value1=(mktime(substr($5,1,4)" "substr($5,6,2)" "substr($5,9,2) " 00 00 00"))-threedaysSecs
value2=(mktime(substr($6,1,4)" "substr($6,6,2)" "substr($6,9,2) " 00 00 00"))-threedaysSecs
$(NF-1)=strftime("%Y-%m-%d", value1)
$NF=strftime("%Y-%m-%d", value2)
}
1
' Input_file
Upvotes: 4
Reputation: 10123
With plain bash
and date
from GNU coreutils:
#!/bin/bash
for file in *.csv; do
while IFS='|' read -ra field; do
if [[ ${field[0]} != \#* ]]; then
field[-2]=$(date -d "${field[-2]} - 3 days" +%F)
if [[ ${field[-1]} != '9999-12-31' ]]; then
field[-1]=$(date -d "${field[-1]} - 3 days" +%F)
fi
fi
(IFS='|'; printf '%s\n' "${field[*]}")
done < "$file" > "temp/new_$file"
done
Upvotes: 1
Reputation: 37404
Another awk, using getline
and date
in shell:
$ awk '
BEGIN {
FS=OFS="|"
}
FNR>1{
for(i=5;i<=5+($5=="9999-12-31");i++)
if(("date -d \"" $i " -3days\" +\"%Y-%m-%d\"" | getline res)>0)
$i=res
else
exit 1
}1' file
Upvotes: 1