Reputation: 19
I have a script that is suppose to run autorep -j <Job Name> -q
and fetch the output of the previous command and filter it out to create a CSV file
.
The Script:
#!/bin/bash
read -p "Enter /DIR/PATH/FILENAME where you wish to copy the data: " FILENAME
echo "Enter the JOB_NAME or %SEARCHSTRING%"
while read -r i;
do
awk '
BEGIN {
print "\"insert_job\",\"job_type\",\"command\",\"machine\",\"owner\",\"date_conditions\",\"condition\",\"run_calendar\",\"exclude_calendar\",\"days_of_week\",\"run_window\",\"start_times\",\"start_mins\",\"profile\",\"term_run_time\",\"watch_file\",\"watch_interval\""
}
/^insert_job:/ { jn="\""$2"\""; jt="\""$4"\""; cmd="\" \""; mcn="\" \""; own="\" \""; dc="\" \""; c="\" \""; rc="\" \""; ec="\" \""; dow="\" \""; ruw="\" \""; st="\" \""; sm="\" \""; prof="\" \""; trt="\" \""; wf="\" \""; wi="\" \"" }
/^command:/ {cmd="\""$ "\""}
/^machine:/ {mcn="\""$2"\""}
/^owner:/ {own="\""$2"\""}
/^date_conditions:/ {dc="\""$2"\""}
/^condition:/ {c="\""$2"\""}
/^run_calendar:/ {rc="\""$2"\""}
/^exclude_calendar:/ {ec="\""$2"\""}
/^days_of_week:/ {dow="\""$2"\""}
/^run_window:/ {ruw="\""$2"\""}
/^start_times:/ {gsub("\"",""); st="\""$2"\""}
/^start_mins:/ {sm="\""$2"\""}
/^profile:/ {prof="\""$2"\""}
/^term_run_time:/ {trt="\""$2"\""}
/^watch_file:/ {wf="\""$2"\""}
/^watch_interval:/ {wi="\""$2"\""}
/_if_terminated/{printf "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n", jn, jt, cmd, mcn, own, dc, c, rc, ec, dow, ruw, st, sm, prof, trt, wf, wi}
' < <(autorep -j $i -q) > $FILENAME.csv
break
done
Depending of the type of job, the output of autorep
command can be little different
Output of Autorep -j <Job Name> -q
JOB_A
insert_job: JOB_A job_type: FW
machine: machine.server.com
owner: User
permission:
date_conditions: 0
term_run_time: 3
alarm_if_fail: 1
profile: "/tmp/.profile"
alarm_if_terminated: 1
watch_file: "/tmp/Control*"
watch_interval: 60
JOB_B
insert_job: JOB_B job_type: CMD
command: Autorep -M ALL
machine: machine.server.com
owner: User
permission:
date_conditions: 1
days_of_week: mo,tu,we,th,fr
start_mins: 9,19,29,39,49,59
run_window: "06:00-19:00"
std_out_file: "/tmp/JOB_B.out.txt"
std_err_file: "/tmp/JOB_B.err.txt"
alarm_if_fail: 1
alarm_if_terminated: 1
If i run the above script to get the output in a CSV format then i get the below output:
Script run for JOB_A
"insert_job","job_type","command","machine","owner","date_conditions","condition","run_calendar","exclude_calendar","days_of_week","run_window","start_times","start_mins","profile","term_run_time","watch_file","watch_interval"
"JOB_A","FW"," ","machine.server.com","User","0"," "," "," "," "," "," "," ",""/tmp/.profile"","3"," "," "
Script run got JOB_B
"insert_job","job_type","command","machine","owner","date_conditions","condition","run_calendar","exclude_calendar","days_of_week","run_window","start_times","start_mins","term_run_time","watch_file","watch_interval"
"JOB_B","CMD","command: Autorep -M ALL,"machine.server.com","User","1"," "," "," ","mo,tu,we,th,fr",""06:00-19:00""," ","9,19,29,39,49,59"," "," "," "," "
For JOB_A few of the fields are not even captured and for JOB_B Command and Machine come as one entity
Expected Output:
JOB_A
"insert_job","job_type","command","machine","owner","date_conditions","condition","run_calendar","exclude_calendar","days_of_week","run_window","start_times","start_mins","profile","term_run_time","watch_file","watch_interval"
"JOB_A","FW"," ","machine.server.com","User","0"," "," "," "," "," "," "," ",""/tmp/.profile"","3",""/tmp/Control*"","60"
JOB_B
"insert_job","job_type","command","machine","owner","date_conditions","condition","run_calendar","exclude_calendar","days_of_week","run_window","start_times","start_mins","term_run_time","watch_file","watch_interval"
"JOB_B","CMD","Autorep -M ALL","machine.server.com","User","1"," "," "," ","mo,tu,we,th,fr",""06:00-19:00""," ","9,19,29,39,49,59"," "," "," "," "
Upvotes: 1
Views: 129
Reputation: 16905
If you choose FS
properly then you can process the input smoothly:
awk -F ': | {2,}' -v OFS=',' '
BEGIN {
nf = split("insert_job,job_type,command,machine,owner,date_conditions,condition,run_calendar,exclude_calendar,days_of_week,run_window,start_times,start_mins,profile,term_run_time,watch_file,watch_interval",header,OFS)
for (i = 1; i <= nf; i++)
printf "%s%s", csvescape(header[i]), (i < nf ? OFS : ORS)
}
{
for ( i = 1; i < NF; i += 2 )
record[$i] = $(i+1)
}
END {
for ( i = 1; i <= nf; i++ )
printf "%s%s", csvescape(record[header[i]]), (i < nf ? OFS : ORS)
}
function csvescape(str) {
gsub(/"/,"\"\"",str)
return "\"" str "\""
}
'
"insert_job","job_type","command","machine","owner","date_conditions","condition","run_calendar","exclude_calendar","days_of_week","run_window","start_times","start_mins","profile","term_run_time","watch_file","watch_interval"
"JOB_A","FW","","machine.server.com","User","0","","","","","","","","""/tmp/.profile""","3","""/tmp/Control*""","60"
"insert_job","job_type","command","machine","owner","date_conditions","condition","run_calendar","exclude_calendar","days_of_week","run_window","start_times","start_mins","profile","term_run_time","watch_file","watch_interval"
"JOB_B","CMD","Autorep -M ALL","machine.server.com","User","1","","","","mo,tu,we,th,fr","""06:00-19:00""","","9,19,29,39,49,59","","","",""
remarks:
" "
for the empty fields then you can add the code for it in the csvescape
function.Upvotes: 1
Reputation: 34184
Addressing just OP's current code ... there are two main issues:
/^command:/ {cmd="\""$ "\""}
- does not reference a specific field so $
is treated the same as $0
(the whole line), hence the reason the output for JOB B / JOB_B
shows the whole line, ie, command: Autorep -M ALL
; one solution would be to strip off the first field (plus the field delimiter) before referencing $0
/if_terminated/ { printf ...}
- is coded with an assumption the line containing if_terminated
is the last line in the file, but for JOB A / JOB_A
said line is not the last line so the printf
is being run before all input lines have been processed; one solution would be to delay the printf
until after the entire file has been read (eg, move to an END{}
block)Rolling these changes into OP's current awk
code, and adding some formatting to make easier to read:
awk '
BEGIN { print "\"insert_job\",\"job_type\",\"command\",\"machine\",\"owner\",\"date_conditions\",\"condition\",\"run_calendar\",\"exclude_calendar\",\"days_of_week\",\"run_window\",\"start_times\",\"start_mins\",\"profile\",\"term_run_time\",\"watch_file\",\"watch_interval\"" }
/^insert_job:/ { jn="\""$2"\""; jt="\""$4"\""; cmd="\" \""; mcn="\" \""; own="\" \""; dc="\" \""; c="\" \""; rc="\" \""; ec="\" \""; dow="\" \""; ruw="\" \""; st="\" \""; sm="\" \""; prof="\" \""; trt="\" \""; wf="\" \""; wi="\" \"" }
/^command:/ { $0=substr($0,index($0,$2));
cmd="\""$0"\"" }
/^machine:/ { mcn="\""$2"\"" }
/^owner:/ { own="\""$2"\"" }
/^date_conditions:/ { dc="\""$2"\"" }
/^condition:/ { c="\""$2"\"" }
/^run_calendar:/ { rc="\""$2"\"" }
/^exclude_calendar:/ { ec="\""$2"\"" }
/^days_of_week:/ { dow="\""$2"\"" }
/^run_window:/ { ruw="\""$2"\"" }
/^start_times:/ { gsub("\"","");
st="\""$2"\"" }
/^start_mins:/ { sm="\""$2"\"" }
/^profile:/ { prof="\""$2"\"" }
/^term_run_time:/ { trt="\""$2"\"" }
/^watch_file:/ { wf="\""$2"\"" }
/^watch_interval:/ { wi="\""$2"\"" }
END { printf "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n",
jn, jt, cmd, mcn, own, dc, c, rc, ec, dow, ruw, st, sm, prof, trt, wf, wi
}
'
Running this against the 2 input files generates:
"insert_job","job_type","command","machine","owner","date_conditions","condition","run_calendar","exclude_calendar","days_of_week","run_window","start_times","start_mins","profile","term_run_time","watch_file","watch_interval"
"JOB_A","FW"," ","machine.server.com","User","0"," "," "," "," "," "," "," ",""/tmp/.profile"","3",""/tmp/Control*"","60"
"insert_job","job_type","command","machine","owner","date_conditions","condition","run_calendar","exclude_calendar","days_of_week","run_window","start_times","start_mins","profile","term_run_time","watch_file","watch_interval"
"JOB_B","CMD","Autorep -M ALL","machine.server.com","User","1"," "," "," ","mo,tu,we,th,fr",""06:00-19:00""," ","9,19,29,39,49,59"," "," "," "," "
A third issue (ok, more of a nag) with the current code, which doesn't actually affect the output, is the unnecessary use of while/break
; one alternative:
read -r i
awk '....' < <(autorep -j $i -q) > $FILENAME.csv
Upvotes: 1
Reputation: 246774
How about this:
sed -E '1s/[[:blank:]]+(job_type:)/\n\1/' "$file" \
| gawk '
BEGIN {
header = "insert_job,job_type,command,machine,owner,date_conditions,condition,run_calendar,exclude_calendar,days_of_week,run_window,start_times,start_mins,profile,term_run_time,watch_file,watch_interval"
n = split(header, h, /,/)
}
match($0, /([[:alnum:]_]+): (.*)/, m) {
field[m[1]] = gensub(/"/, "&&", "g", m[2])
}
END {
for (i=1; i<=n; i++)
printf "\"%s\"%s", h[i], (i == n ? "\n" : ",")
for (i=1; i<=n; i++)
printf "\"%s\"%s", field[h[i]], (i == n ? "\n" : ",")
}
'
Outputs: JOB_A
"insert_job","job_type","command","machine","owner","date_conditions","condition","run_calendar","exclude_calendar","days_of_week","run_window","start_times","start_mins","profile","term_run_time","watch_file","watch_interval"
"JOB_A","FW","","machine.server.com","User","0","","","","","","","","""/tmp/.profile""","3","""/tmp/Control*""","60"
JOB_B
"insert_job","job_type","command","machine","owner","date_conditions","condition","run_calendar","exclude_calendar","days_of_week","run_window","start_times","start_mins","profile","term_run_time","watch_file","watch_interval"
"JOB_B","CMD","Autorep -M ALL","machine.server.com","User","1","","","","mo,tu,we,th,fr","""06:00-19:00""","","9,19,29,39,49,59","","","",""
This uses sed
to split the 2 name:value pairs on line 1 into 2 separate lines. Then, every line can be parsed the same way.
I'm using GNU awk specifically: for the 3-argument match() function and the gensub() function.
One thing you need to be aware of with CVS: embedded quotes need to be doubled. To put He said "hello."
as a CSV field would look like
"sentence"
"He said ""hello."""
That's what I'm doing with the gensub
function.
I assume you'll want to put multiple jobs in the same CSV output. You can do this:
to_csv() {
sed -E '1s/[[:blank:]]+(job_type:)/\n\1/' "$1" \
| gawk '
BEGIN {
header = "insert_job,job_type,command,machine,owner,date_conditions,condition,run_calendar,exclude_calendar,days_of_week,run_window,start_times,start_mins,profile,term_run_time,watch_file,watch_interval"
n = split(header, h, /,/)
for (i=1; i<=n; i++)
printf "\"%s\"%s", h[i], (i == n ? "\n" : ",")
}
BEGINFILE {delete field}
match($0, /([[:alnum:]_]+): (.*)/, m) {
field[m[1]] = gensub(/"/, "&&", "g", m[2])
}
ENDFILE {
for (i=1; i<=n; i++)
printf "\"%s\"%s", field[h[i]], (i == n ? "\n" : ",")
}
'
}
for file in JOB_*; do
to_csv "$file"
done \
| awk 'NR == 1 || NR % 2 == 0'
which outputs
"insert_job","job_type","command","machine","owner","date_conditions","condition","run_calendar","exclude_calendar","days_of_week","run_window","start_times","start_mins","profile","term_run_time","watch_file","watch_interval"
"JOB_A","FW","","machine.server.com","User","0","","","","","","","","""/tmp/.profile""","3","""/tmp/Control*""","60"
"JOB_B","CMD","Autorep -M ALL","machine.server.com","User","1","","","","mo,tu,we,th,fr","""06:00-19:00""","","9,19,29,39,49,59","","","",""
Upvotes: 0
Reputation: 1707
I see some of the field names that are not coded for recognition and capture. You should report those if encountered for later review and consideration.
It is bad practice to code the end-of-line semi-colon ";" up against the last variable or character on a line, because it could be interpreted as part of that adjacent string.
I also see the line for command
/^command:/ {cmd="\""$ "\""}
might have the missing digit "2".
Your logic for "start_times" will not capture the correct value.
It is not necessary to wrap field contents in a CSV with double quotes, unless there are special characters that confuse the field separator identification, i.e. the commas in "start_mins". Unless you know that the process using the CSV files has issues with consecutive commas (i.e. ",,"), you should not have to assign the space character as a value for "empty" fields. Also assigning spaces can be problematic for interpretation by some processes, so you might want to code with a specific value (I suggest the string "NULL" for visibility) or leave the value empty, i.e. 2 consecutive commas.
Lastly, if any of the fields contain your delimiter character (i.e. ",") then that could create confusion for some interpreters. By definition, some might completely ignore the quotes ... and that would be problematic for parsing content like "start_mins". In those cases, you need to allow for character substitution to eliminate the conflict. In the code below, I have defined the vertical bar "|" as the alternate character to be used if a conflict is detected between the intended delimiter and the content being parsed.
#!/bin/bash
DBG=0
NOVAL="BL"
DQ=0
SPLITTER="|"
while [ $# -gt 0 ]
do
case $1 in
--null )
NOVAL="NL" ; shift
;;
--space )
NOVAL="SP" ; shift
;;
--empty )
NOVAL="BL" ; shift
;;
--quotes )
DQ=1 ; shift
;;
--splitter )
SPLITTER="$2" ; shift ; shift
;;
* )
echo "\n\t Invalid parameter '$1' used on command line. Only valid: [ --null | --space | --empty | --quotes ]\n Bye!\n" ; exit 1
;;
esac
done
### Questions: https://stackoverflow.com/questions/74867891/csv-formating-with-awk
cat >JOB_A <<"EnDoFiNpUt"
insert_job: JOB_A job_type: FW
machine: machine.server.com
owner: User
permission:
date_conditions: 0
term_run_time: 3
alarm_if_fail: 1
profile: "/tmp/.profile"
alarm_if_terminated: 1
watch_file: "/tmp/Control*"
watch_interval: 60
EnDoFiNpUt
cat >JOB_B <<"EnDoFiNpUt"
insert_job: JOB_B job_type: CMD
command: Autorep -M ALL
machine: machine.server.com
owner: User
permission:
date_conditions: 1
days_of_week: mo,tu,we,th,fr
start_mins: 9,19,29,39,49,59
run_window: "06:00-19:00"
std_out_file: "/tmp/JOB_B.out.txt"
std_err_file: "/tmp/JOB_B.err.txt"
alarm_if_fail: 1
alarm_if_terminated: 1
EnDoFiNpUt
if [ ${DBG} -eq 1 ]
then
wc -l JOB_A
wc -l JOB_B
fi
#read -p "Enter /DIR/PATH/FILENAME where you wish to copy the data: " DESTINATION
if [ ${DQ} -eq 1 ]
then
HEADER="\"insert_job\",\"job_type\",\"command\",\"machine\",\"owner\",\"date_conditions\",\"condition\",\"run_calendar\",\"exclude_calendar\",\"days_of_week\",\"run_window\",\"start_times\",\"start_mins\",\"profile\",\"term_run_time\",\"watch_file\",\"watch_interval\""
FORMAT="\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\"\n"
else
HEADER="insert_job,job_type,command,machine,owner,date_conditions,condition,run_calendar,exclude_calendar,days_of_week,run_window,start_times,start_mins,profile,term_run_time,watch_file,watch_interval"
FORMAT="%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n"
fi
for job in JOB_A JOB_B
do
DESTINATION="DUMP__${job}"
echo -e "Processing: ${job} ..."
#read -p "Enter the JOB_NAME or %SEARCHSTRING%" i
{
awk -v dbg="${DBG}" \
-v noval="${NOVAL}" \
-v header="${HEADER}" \
-v format="${FORMAT}" \
-v splitter="${SPLITTER}" '\
BEGIN{
k=0 ;
altSplit=0 ;
switch (noval) {
case "NL" : { noval="NULL" ; break ; } ;
case "SP" : { noval=" " ; break ; } ;
default : { noval="" ; break ; } ;
} ;
}{
k++
if(dbg == 1 ){ print "\n", k, "\nLINE= ", $0 | "cat >&2" ; } ;
pos=index( $0, ":" ) ;
field=substr( $0, 1, pos-1 ) ;
if(dbg == 1 ){ print " >> field= ", field | "cat >&2" ; } ;
tmp=substr( $0, pos+2 ) ;
gsub( "\"", "", tmp) ;
if( index( tmp, "," ) != 0 ){ altSplit=1 ; } ;
if(dbg == 1 ){ print " >> altSplit= ", altSplit | "cat >&2" ; } ;
if(dbg == 1 ){ print " >> tmp= ", tmp | "cat >&2" ; } ;
switch (field) {
case "insert_job" : {
#insert_job: JOB_B job_type: CMD
jn=$2 ;
jt=$4 ;
#Initialize other fields with empty string for placement
cmd=noval ;
mcn=noval ;
own=noval ;
dc=noval ;
c=noval ;
rc=noval ;
ec=noval ;
dow=noval ;
ruw=noval ;
st=noval ;
sm=noval ;
prof=noval ;
trt=noval ;
wf=noval ;
wi=noval ;
} ; break ;
case "command" : { cmd=tmp ; } ; break ;
case "machine" : { mcn=tmp ; } ; break ;
case "owner" : { own=tmp ; } ; break ;
case "date_conditions" : { dc=tmp ; } ; break ;
case "condition" : { c=tmp ; } ; break ;
case "run_calendar" : { rc=tmp ; } ; break ;
case "exclude_calendar" : { ec=tmp ; } ; break ;
case "days_of_week" : { dow=tmp ; } ; break ;
case "run_window" : { ruw=tmp ; } ; break ;
case "start_times" : { st=tmp ; } ; break ;
case "start_mins" : { sm=tmp ; } ; break ;
case "profile" : { prof=tmp ; } ; break ;
case "term_run_time" : { trt=tmp ; } ; break ;
case "watch_file" : { wf=tmp ; } ; break ;
case "watch_interval" : { wi=tmp ; } ; break ;
case "permission" : break ;
default:
if( $0 != "" ){
printf("[WARNING|%s] Ignored line %s for NON-recognized field: %s\n", FILENAME, NR, field ) | "cat >&2" ;
} ; break ;
} ;
}END{
if( altSplit == 1 ){
gsub( ",", splitter , header ) ;
gsub( ",", splitter , format ) ;
} ;
print header ;
printf(format, \
jn, jt, cmd, mcn, own, dc, c, rc, ec, dow, ruw, st, sm, prof, trt, wf, wi ) ;
}' "${job}" > "${DESTINATION}.csv"
ls -l "${DESTINATION}.csv"
} 2>&1 | awk '{ printf("\t%s\n", $0 ) ; }'
echo -e "\nReviewing '${DESTINATION}.csv' ..."
more "${DESTINATION}.csv"
echo ""
done
exit
The session log (with debugging turned off) is as follows:
ericthered:/0__WORK$ ./test_95.sh --empty --quotes
Processing: JOB_A ...
[WARNING|JOB_A] Ignored line 7 for NON-recognized field: alarm_if_fail
[WARNING|JOB_A] Ignored line 9 for NON-recognized field: alarm_if_terminated
-rw-rw-r-- 1 ericthered ericthered 337 Dec 20 20:23 DUMP__JOB_A.csv
Reviewing 'DUMP__JOB_A.csv' ...
"insert_job","job_type","command","machine","owner","date_conditions","condition","run_calendar","exclude_cale
ndar","days_of_week","run_window","start_times","start_mins","profile","term_run_time","watch_file","watch_int
erval"
"JOB_A","FW","","machine.server.com","User","0","","","","","","","","/tmp/.profile","3","/tmp/Control*","60"
Processing: JOB_B ...
[WARNING|JOB_B] Ignored line 10 for NON-recognized field: std_out_file
[WARNING|JOB_B] Ignored line 11 for NON-recognized field: std_err_file
[WARNING|JOB_B] Ignored line 12 for NON-recognized field: alarm_if_fail
[WARNING|JOB_B] Ignored line 13 for NON-recognized field: alarm_if_terminated
-rw-rw-r-- 1 ericthered ericthered 364 Dec 20 20:23 DUMP__JOB_B.csv
Reviewing 'DUMP__JOB_B.csv' ...
"insert_job"|"job_type"|"command"|"machine"|"owner"|"date_conditions"|"condition"|"run_calendar"|"exclude_cale
ndar"|"days_of_week"|"run_window"|"start_times"|"start_mins"|"profile"|"term_run_time"|"watch_file"|"watch_int
erval"
"JOB_B"|"CMD"|"Autorep -M ALL"|"machine.server.com"|"User"|"1"|""|""|""|"mo,tu,we,th,fr"|"06:00-19:00"|""|"9,1
9,29,39,49,59"|""|""|""|""
ericthered:/0__WORK$
As you can see above, the logic triggered the alternate delimiter "|" on detection of the commas in data for JOB_B.
Upvotes: 0