Reputation: 451
I have the following awk script:
#! /usr/bin/awk -f
BEGIN{
FS=OFS="\t";
split("Mtm|^Dept|^Im|^TT|ge[mn]$,^Mini", arr1, ",")
split("Variable Expenses,Fixed Expenses", arr2, ",")
itm="Accounting & Legal";
}
$3<0 && $4~arr1[1] {$7=itm; $8=arr2[1]; print $0}
$3<0 && $4~arr1[2] {$7=itm; $8=arr2[2]; print $0}
Which produces the following output:
c_wp_e_2025 2021-05-10 -120.00 8430Ken Jebsen BILL PAYMENT 12:08-11662 Accounting & Legal Variable Expenses
c_wp_e_2148 2021-07-27 -90.85 6450Barrie L. Jorgen BILL PAYMENT 09:31-35651 Accounting & Legal Variable Expenses
e_wp_e_789 2018-06-12 -190.00 Immigration NZ 11 DEBIT Accounting & Legal Variable Expenses
e_wp_e_818 2018-06-29 -153.43 Dept of Home Affa 26 DEBIT AUD 141.37 Accounting & Legal Variable Expenses
e_wp_e_839 2018-07-18 -67.47 TT Services New Z 16 DEBIT Accounting & Legal Variable Expenses
e_wp_e_2512 2021-07-27 -470.20 Dept Internal Aff 23 DEBIT Accounting & Legal Variable Expenses
c_az_262 2021-10-01 -210.45 Mtm Accounting Ltd. Mlm Accounting Accounting & Legal Variable Expenses
c_az_421 2021-07-19 -561.20 Paypal *Mtm Paypal *Mtm Accounting & Legal Variable Expenses
c_az_1082 2020-11-25 -52.20 Ministryofbusiness 285515Dpsa2A Mbie Accounting & Legal Fixed Expenses
c_az_1110 2020-11-16 -2228.70 Mtm Accounting Ltd. Mlm Accounting Accounting & Legal Variable Expenses
This works as desired. I have however many more categories besides itm="Accounting & Legal" (e.g. itm="Bank_charges) and sometimes there are Fixed Expenses and Variable Expenses. At other times one of them only in which case the arrays created in the splits would only have one value. In such a case the second statement in the main body would become unused. For every new category there are different search patterns. I am new to writing awk programs and am stuck on how to approach this. How could this be written efficiently to accommodate the above scenario.
The input (excerpt) would be:
c_wp_e_2025 2021-05-10 -120.00 8430Ken Jebsen BILL PAYMENT 12:08-11662
c_wp_e_2148 2021-07-27 -90.85 6450Barrie L. Jorgen BILL PAYMENT 09:31-35651
e_wp_e_789 2018-06-12 -190.00 Immigration NZ 11 DEBIT
e_wp_e_818 2018-06-29 -153.43 Dept of Home Affa 26 DEBIT AUD 141.37
e_wp_e_839 2018-07-18 -67.47 TT Services New Z 16 DEBIT
e_wp_e_2512 2021-07-27 -470.20 Dept Internal Aff 23 DEBIT
c_az_262 2021-10-01 -210.45 Mtm Accounting Ltd. Mlm Accounting
c_az_421 2021-07-19 -561.20 Paypal *Mtm Paypal *Mtm
c_az_1082 2020-11-25 -52.20 Ministryofbusiness 285515Dpsa2A Mbie
c_az_1110 2020-11-16 -2228.70 Mtm Accounting Ltd. Mlm Accounting
Criteria for Bank_charges e.g. would be:
split("^Cle|Forei|2 WBC|irnie W| \
wtown W|*M|^Repl|elex|^Unar|lert$,enance$|ebit|A/C|rice$", arr1, ",")
split("Variable Expenses,Fixed Expenses", arr2,",")
itm="Bank_charges
Some output from this criteria would result in:
c_az_1668 2020-03-06 -10.00 Visa Debit Card Fee 4825561****** 4823 Bank_charges Fixed Expenses
c_az_1687 2020-02-28 -8.50 Monthly A/C Fee Bank_charges Fixed Expenses
c_az_1688 2020-02-28 -2.50 Clearance Fee Bank_charges Variable Expenses
c_az_1785 2020-01-31 -8.50 Monthly A/C Fee Bank_charges Fixed Expenses
Another category that only has Variable expenses is:
split("^318|^74 |ASB|^City S|^Fix", arr1,",")
split("Variable Expenses", arr2,",")
itm="Bank_withdrawals"
This however does not work at all with the above action. All data from the input file is categorized Bank_withdrawals and classified as Variable expenses.
Upvotes: 0
Views: 73
Reputation: 203129
I think this is what you're trying to do but it's obviously untested since there's isn't a concrete sample of input and expected output in your question to test with:
$ cat tst.sh
#!/usr/bin/env bash
awk '
BEGIN{
FS=OFS="\t"
mkMap("Mtm|^Dept|^Im|^TT|ge[mn]$", "Variable Expenses", "Accounting & Legal")
mkMap("^Mini", "Fixed Expenses", "Accounting & Legal")
mkMap("^Cle|Forei|2 WBC|irnie W| wtown W|[*]M|^Repl|elex|^Unar|lert$", "Variable Expenses", "Bank_charges")
mkMap("enance$|ebit|A/C|rice$", "Fixed Expenses", "Bank_charges")
mkMap("^318|^74 |ASB|^City S|^Fix", "Variable Expenses", "Bank_withdrawals")
}
$3 < 0 {
found = 0
for ( re in re2type ) {
if ( $4 ~ re ) {
$7 = re2item[re]
$8 = re2type[re]
found = 1
}
}
if ( found ) {
print
}
}
function mkMap(re,type,item) {
re2type[re] = type
re2item[re] = item
}
' "${@:--}"
Upvotes: 1
Reputation: 22002
Would you please try the following:
#!/usr/bin/awk -f
BEGIN {
FS=OFS="\t";
# enumerate the item, keyword and the statement in order
itm = "Accounting & Legal"
key[itm] = "Mtm|^Dept|^Im|^TT|ge[mn]$,^Mini"
stmnt[itm] = "Variable Expenses,Fixed Expenses"
itm = "Bank_charges"
key[itm] = "^Cle|Forei|2 WBC|irnie W|wtown W|*M|^Repl|elex|^Unar|lert$,enance$|ebit|A/C|rice$"
stmnt[itm] = "Variable Expenses,Fixed Expenses"
itm="Bank_withdrawals"
key[itm] = "^318|^74 |ASB|^City S|^Fix"
stmnt[itm] = "Variable Expenses"
# initialize key2 and stmnt2 using itm, key and stmnt defined above
for (itm in key) {
split(key[itm], a, ",")
key2[itm,1] = a[1]
key2[itm,2] = a[2]
split(stmnt[itm], a, ",")
stmnt2[itm,1] = a[1]
stmnt2[itm,2] = a[2]
}
}
# main loop with the input lines
{
for (itm in key) {
for (i = 1; i <= 2; i++) {
if (key2[itm,i] != "") {
if ($3 < 0 && $4 ~ key2[itm,i]) {
$7 = itm; $8 = stmnt2[itm,i]
print $0
}
}
}
}
}
If you have more items, append them with the keys and statements below the "Bank_charges" lines referring to the existing codes. If an item has either statement "Variable Expenses" or "Fixed Expenses", just put one element without a comma.
Upvotes: 1
Reputation: 1286
If you're new to awk, it might be best to look at writing this long hand:
#! /usr/bin/awk -f
BEGIN{ FS=OFS="\t"; }
$3<0 && $4~"Mtm|^Dept|^Im|^TT|ge[mn]$" {$7=;"Accounting & Legal"; $8="Variable Expenses"; print $0}
$3<0 && $4~"^Mini" {$7="Accounting & Legal"; $8="Fixed Expenses"; print $0}
(Please excuse typos, writing on my phone)
However I don't believe this is what you're actually asking for, so I'd suggest looping through the elements of the arrays that have been created in the begin like so:
#! /usr/bin/awk -f
BEGIN{
FS=OFS="\t";
els=split("Mtm|^Dept|^Im|^TT|ge[mn]$,^Mini", arr1, ",");
split("Variable Expenses,Fixed Expenses", arr2, ",");
split("Accounting & Legal","Accounting & Legal", arr3, ",");
}
for (i=1;i<=els;i++){
$3<0 && $4~arr1[i] {$7=arr3[i]; $8=arr2[i]; print $0}
}
Upvotes: 0