Mikou
Mikou

Reputation: 99

Parsing using Regular expression

I have a string like

DELIVERY 'AR_ACTDETAIL_UPD' 'AR_DETAIL_UPD'
MODULE TABLE
FILTER 'AR_ACT'
DEFINE UPDN 'UPDATE'
DEFINE REFH 'UPDATE'
DEFINE CIT '5000'
DEFINE TDB 'TARGET'
DEFINE TABLE 'AR_ACTIVITY_DETAIL'
ELEMENTS '"POSTING_PERIOD_SID","POSTING_DATE_SID","ACCTG_DOC_CLSS_SID","CUSTOMER_BLLT_SID","AR_ASSET_ACCNT_SID","CO_CNSLDTN_SID","AR_BSNSS_UNT_SID","LOCAL_CURRENCY_AMT","LCL_CUR_DSCNT_AAMT","LCL_CUR_DSCNT_TAMMT","TAXABLE_AMT","NO_TAXABLE_AMT","NO_FOREIGN_TAXABLE_AMT","TAX_CODE","BEFOREINVOICE_SID","ACCNT_BANK_PAYMENT_SID","GAIN_LOSS_LOC_AMT","LOCAL_OPEN_AMOUNT","FOREIGN_OPEN_AMOUNT"'
ELEDEFINE +3 IFNULL '0'
ELEDEFINE +4 IFNULL '0'
ELEDEFINE +5 IFNULL '0'
ELEDEFINE +6 IFNULL '0'

DELIVERY 'AR_DOC_EXINS' 'AR_DOC_EINS'
MODULE TABLE
FILTER 'AR_DOC_EXTNSN_COLS_INS'
DEFINE UPDATE_DETECTION 'UPDATE'
DEFINE REFRESH 'APPEND'
DEFINE COMMIT '5000'
DEFINE TABLEDB 'TARGET'
DEFINE TABLE 'AR_DOC_EXTNSN_COLS'
ELEMENTS '"AR_ACTVSID","DOCUMENT_CO_CD","TRANSACTION_TYP_CD","TRANSACTION_NO","DOCUMENT_SUFFIX_NO","DOC_SFFX_EXTT_NO","INVOICE_MULT","PAYMENT_MULT","ADJUSTMENT_MULT","BAD_DEBT_MULT","SOURCE_INFO","CHG_INVC_PY_STTS_IND","CHG_GROUP_CURRENCY_AMT","CHG_GRP_CUR_DSCNT_AAMT","CHG_GRP_CUR_DSCNT_TAMT","CREATED_DT","CHANGED_DT","ACCOUNT_ID","FI_ACCOUNT_ID"'
ELEDEFINE +2 IFNULL '{$U_DEFAULT_SHORT_CHAR}'
ELEDEFINE +3 IFNULL '{$U_DEFAULT_SHORT_CHAR}'

DELIVERY 'AR_DOC_EXS_UPD' 'AR_DOC__UPD'
MODULE TABLE
FILTER 'AR_DOC_EXTNSN_COLS_UPD'
DEFINE UPDATE_DETECTION 'UPDATE'
DEFINE REFRESH 'UPDATE'
DEFINE COMMIT '5000'
DEFINE TABLEDB 'TARGET'
DEFINE TABLE 'AR_DOC_EOLS'
ELEMENTS '"DOCUMENT_CO_CD","TRANSACTION_TYP_CD","TRANSACTION_NO","DOCUMENT_SUFFIX_NO","DOC_SFFX_EXTNSN_NO","PAYMENT_ID","PAYMETED_DT","CHANGED_DT","AR_ACTVTY_DOC_SID_TMP","ACCOUNT_ID","FI_ACCOUNT_ID","O_GROUP_CURRENCY_AMT","O_GRP_CUR_DSCNT_TAMT","O_GRP_CUR_DSCNT_AAMT"'
ELEDEFINE +1 IFNULL '{$U_DEFAULT_SHORT_CHAR}'
ELEDEFINE +2 IFNULL '{$U_DEFAULT_SHORT_CHAR}'
ELEDEFINE +3 IFNULL '{$U_DEFAULT_NUMBER}'

ALLOW DUPLICATES

The number of deliveries is unknown, but if I found ALLOW or REJECT or MERGE or AGGREGATE that's mean there are no more deliveries. My idea was to extract each DELIVERY source code apart(which is the substring existing between two Delivery or substring existing between delivery and one of these words (ALLOW,REJECT,MERGE,AGGREGATE)) so tried this Pattern:

Pattern p = Pattern.compile("DELIVERY\\s.*?[\\nDELIVERY|\\nALLOW|\\nREJECT|\\nMERGE|\\nAGGREGATE]?",Pattern.DOTALL);

but it doens't works for me.

Upvotes: 1

Views: 192

Answers (2)

Ro Yo Mi
Ro Yo Mi

Reputation: 14990

Description

This regex will capture each of the three deliveries in your sample text

^delivery(?:'[^']*'|.)*?^(?=delivery|aggregate|reject|allow|merge)

enter image description here

Example

DELIVERY 'AR_ACTDETAIL_UPD' 'AR_DETAIL_UPD'
MODULE TABLE
FILTER 'AR_ACT'
DEFINE UPDN 'UPDATE'
DEFINE REFH 'UPDATE'
DEFINE CIT '5000'
DEFINE TDB 'TARGET'
DEFINE TABLE 'AR_ACTIVITY_DETAIL'
ELEMENTS '"POSTING_PERIOD_SID","POSTING_DATE_SID","ACCTG_DOC_CLSS_SID","CUSTOMER_BLLT_SID","AR_ASSET_ACCNT_SID","CO_CNSLDTN_SID","AR_BSNSS_UNT_SID","LOCAL_CURRENCY_AMT","LCL_CUR_DSCNT_AAMT","LCL_CUR_DSCNT_TAMMT","TAXABLE_AMT","NO_TAXABLE_AMT","NO_FOREIGN_TAXABLE_AMT","TAX_CODE","BEFOREINVOICE_SID","ACCNT_BANK_PAYMENT_SID","GAIN_LOSS_LOC_AMT","LOCAL_OPEN_AMOUNT","FOREIGN_OPEN_AMOUNT"'
ELEDEFINE +3 IFNULL '0'
ELEDEFINE +4 IFNULL '0'
ELEDEFINE +5 IFNULL '0'
ELEDEFINE +6 IFNULL '0'

DELIVERY 'AR_DOC_EXINS' 'AR_DOC_EINS'
MODULE TABLE
FILTER 'AR_DOC_EXTNSN_COLS_INS'
DEFINE UPDATE_DETECTION 'UPDATE'
DEFINE REFRESH 'APPEND'
DEFINE COMMIT '5000'
DEFINE TABLEDB 'TARGET'
DEFINE TABLE 'AR_DOC_EXTNSN_COLS'
ELEMENTS '"AR_ACTVSID","DOCUMENT_CO_CD","TRANSACTION_TYP_CD","TRANSACTION_NO","DOCUMENT_SUFFIX_NO","DOC_SFFX_EXTT_NO","INVOICE_MULT","PAYMENT_MULT","ADJUSTMENT_MULT","BAD_DEBT_MULT","SOURCE_INFO","CHG_INVC_PY_STTS_IND","CHG_GROUP_CURRENCY_AMT","CHG_GRP_CUR_DSCNT_AAMT","CHG_GRP_CUR_DSCNT_TAMT","CREATED_DT","CHANGED_DT","ACCOUNT_ID","FI_ACCOUNT_ID"'
ELEDEFINE +2 IFNULL '{$U_DEFAULT_SHORT_CHAR}'
ELEDEFINE +3 IFNULL '{$U_DEFAULT_SHORT_CHAR}'

DELIVERY 'AR_DOC_EXS_UPD' 'AR_DOC__UPD'
MODULE TABLE
FILTER 'AR_DOC_EXTNSN_COLS_UPD'
DEFINE UPDATE_DETECTION 'UPDATE'
DEFINE REFRESH 'UPDATE'
DEFINE COMMIT '5000'
DEFINE TABLEDB 'TARGET'
DEFINE TABLE 'AR_DOC_EOLS'
ELEMENTS '"DOCUMENT_CO_CD","TRANSACTION_TYP_CD","TRANSACTION_NO","DOCUMENT_SUFFIX_NO","DOC_SFFX_EXTNSN_NO","PAYMENT_ID","PAYMETED_DT","CHANGED_DT","AR_ACTVTY_DOC_SID_TMP","ACCOUNT_ID","FI_ACCOUNT_ID","O_GROUP_CURRENCY_AMT","O_GRP_CUR_DSCNT_TAMT","O_GRP_CUR_DSCNT_AAMT"'
ELEDEFINE +1 IFNULL '{$U_DEFAULT_SHORT_CHAR}'
ELEDEFINE +2 IFNULL '{$U_DEFAULT_SHORT_CHAR}'
ELEDEFINE +3 IFNULL '{$U_DEFAULT_NUMBER}'

ALLOW DUPLICATES

Code

import java.util.regex.Pattern;
import java.util.regex.Matcher;
class Module1{
  public static void main(String[] asd){
  String sourcestring = "source string to match with pattern";
  Pattern re = Pattern.compile("^delivery(?:'[^']*'|.)*?^(?=delivery|aggregate|reject|allow|merge)",Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL);
  Matcher m = re.matcher(sourcestring);
  int mIdx = 0;
    while (m.find()){
      for( int groupIdx = 0; groupIdx < m.groupCount()+1; groupIdx++ ){
        System.out.println( "[" + mIdx + "][" + groupIdx + "] = " + m.group(groupIdx));
      }
      mIdx++;
    }
  }
}

Matches

[0][0] = DELIVERY 'AR_ACTDETAIL_UPD' 'AR_DETAIL_UPD'
MODULE TABLE
FILTER 'AR_ACT'
DEFINE UPDN 'UPDATE'
DEFINE REFH 'UPDATE'
DEFINE CIT '5000'
DEFINE TDB 'TARGET'
DEFINE TABLE 'AR_ACTIVITY_DETAIL'
ELEMENTS '"POSTING_PERIOD_SID","POSTING_DATE_SID","ACCTG_DOC_CLSS_SID","CUSTOMER_BLLT_SID","AR_ASSET_ACCNT_SID","CO_CNSLDTN_SID","AR_BSNSS_UNT_SID","LOCAL_CURRENCY_AMT","LCL_CUR_DSCNT_AAMT","LCL_CUR_DSCNT_TAMMT","TAXABLE_AMT","NO_TAXABLE_AMT","NO_FOREIGN_TAXABLE_AMT","TAX_CODE","BEFOREINVOICE_SID","ACCNT_BANK_PAYMENT_SID","GAIN_LOSS_LOC_AMT","LOCAL_OPEN_AMOUNT","FOREIGN_OPEN_AMOUNT"'
ELEDEFINE +3 IFNULL '0'
ELEDEFINE +4 IFNULL '0'
ELEDEFINE +5 IFNULL '0'
ELEDEFINE +6 IFNULL '0'


[1][0] = DELIVERY 'AR_DOC_EXINS' 'AR_DOC_EINS'
MODULE TABLE
FILTER 'AR_DOC_EXTNSN_COLS_INS'
DEFINE UPDATE_DETECTION 'UPDATE'
DEFINE REFRESH 'APPEND'
DEFINE COMMIT '5000'
DEFINE TABLEDB 'TARGET'
DEFINE TABLE 'AR_DOC_EXTNSN_COLS'
ELEMENTS '"AR_ACTVSID","DOCUMENT_CO_CD","TRANSACTION_TYP_CD","TRANSACTION_NO","DOCUMENT_SUFFIX_NO","DOC_SFFX_EXTT_NO","INVOICE_MULT","PAYMENT_MULT","ADJUSTMENT_MULT","BAD_DEBT_MULT","SOURCE_INFO","CHG_INVC_PY_STTS_IND","CHG_GROUP_CURRENCY_AMT","CHG_GRP_CUR_DSCNT_AAMT","CHG_GRP_CUR_DSCNT_TAMT","CREATED_DT","CHANGED_DT","ACCOUNT_ID","FI_ACCOUNT_ID"'
ELEDEFINE +2 IFNULL '{$U_DEFAULT_SHORT_CHAR}'
ELEDEFINE +3 IFNULL '{$U_DEFAULT_SHORT_CHAR}'


[2][0] = DELIVERY 'AR_DOC_EXS_UPD' 'AR_DOC__UPD'
MODULE TABLE
FILTER 'AR_DOC_EXTNSN_COLS_UPD'
DEFINE UPDATE_DETECTION 'UPDATE'
DEFINE REFRESH 'UPDATE'
DEFINE COMMIT '5000'
DEFINE TABLEDB 'TARGET'
DEFINE TABLE 'AR_DOC_EOLS'
ELEMENTS '"DOCUMENT_CO_CD","TRANSACTION_TYP_CD","TRANSACTION_NO","DOCUMENT_SUFFIX_NO","DOC_SFFX_EXTNSN_NO","PAYMENT_ID","PAYMETED_DT","CHANGED_DT","AR_ACTVTY_DOC_SID_TMP","ACCOUNT_ID","FI_ACCOUNT_ID","O_GROUP_CURRENCY_AMT","O_GRP_CUR_DSCNT_TAMT","O_GRP_CUR_DSCNT_AAMT"'
ELEDEFINE +1 IFNULL '{$U_DEFAULT_SHORT_CHAR}'
ELEDEFINE +2 IFNULL '{$U_DEFAULT_SHORT_CHAR}'
ELEDEFINE +3 IFNULL '{$U_DEFAULT_NUMBER}'

Upvotes: 3

anubhava
anubhava

Reputation: 784868

Following code should work for you:

Pattern p = Pattern.compile("(?s)(\\bDELIVERY\\b.+?(?=\\b(?:DELIVERY|ALLOW|REJECT|MERGE|AGGREGATE)\\b))");
Matcher m = p.matcher(str); // str is your input string
boolean found = false;
while (m.find()) {
    System.out.printf("Deliver: %s%n", m.group(1));
    found = true;
}

if (!found) {
    System.out.println("Didn't Match");
}

Upvotes: 2

Related Questions