Hisham Hijjawi
Hisham Hijjawi

Reputation: 2425

Isolate Blocks of Text with Specific Regex Python

Suppose I have a file filled with text like so:

module combfn1789(clk, i0, i1, i2, i3, o);
  input clk, i0, i1, i2, i3; 
  output o;
  wire clk, i0, i1, i2, i3;
  wire o;
  wire UNCONNECTED788, n_0, n_1, n_2, n_3, n_4;
  Q_FDP0I0 o_reg(.CK (clk), .D (n_4), .Q (o), .QN (UNCONNECTED788));
  Q_OAI33 g186(.A0 (i2), .A1 (n_1), .A2 (i0), .B0 (n_0), .B1 (n_3), .B2
       (n_2), .Z (n_4));
  Q_INV g187(.A (i3), .Z (n_3));
  Q_INV g188(.A (i0), .Z (n_2));
  Q_INV g189(.A (i1), .Z (n_1));
  Q_INV g190(.A (i2), .Z (n_0));
endmodule;

module combfn1(clk, i0, i1, i2, i3, o);
  input clk, i0, i1, i2, i3;
  output o;
  wire clk, i0, i1, i2, i3;
  wire o;
  wire UNCONNECTED0, n_0, n_1;
  Q_FDP0I0 o_reg(.CK (clk), .D (n_1), .Q (o), .QN (UNCONNECTED0));
  Q_NR04 g59__4296(.A0 (i2), .A1 (i1), .A2 (n_0), .A3 (i3), .Z (n_1));
  Q_INV g60(.A (i0), .Z (n_0));
endmodule

I am only interested in a subset of the text, so I am trying to write a python program to isolate the following:

combfn1789
Q_FDP0I0 o_reg(.CK (clk), .D (n_4), .Q (o), .QN (UNCONNECTED788));
Q_OAI33 g186(.A0 (i2), .A1 (n_1), .A2 (i0), .B0 (n_0), .B1 (n_3), .B2
      (n_2), .Z (n_4));
Q_INV g187(.A (i3), .Z (n_3));
Q_INV g188(.A (i0), .Z (n_2));
Q_INV g189(.A (i1), .Z (n_1));
Q_INV g190(.A (i2), .Z (n_0));

combfn1
Q_NR04 g59__4296(.A0 (i2), .A1 (i1), .A2 (n_0), .A3 (i3), .Z (n_1));
Q_INV g60(.A (i0), .Z (n_0));

My initial thought was to isolate lines which start with Q_ using re.search. Unfortunately this doesn't work for isolating the module name combfn. I'm not sure how to write a regex that isolates bot the lines that start with Q_ and that module name.

Upvotes: 0

Views: 39

Answers (1)

Emma
Emma

Reputation: 27733

This expression or maybe a modified version of that might likely return the desired output or a bit close to that,

module\s+\K([^)(]+)|(Q_[\s\S]*?;)

Test with re.finditer

import re

regex = r"module\s+([^)(]+)|(Q_[\s\S]*?;)"

test_str = ("module combfn1789(clk, i0, i1, i2, i3, o);\n"
    "  input clk, i0, i1, i2, i3; \n"
    "  output o;\n"
    "  wire clk, i0, i1, i2, i3;\n"
    "  wire o;\n"
    "  wire UNCONNECTED788, n_0, n_1, n_2, n_3, n_4;\n"
    "  Q_FDP0I0 o_reg(.CK (clk), .D (n_4), .Q (o), .QN (UNCONNECTED788));\n"
    "  Q_OAI33 g186(.A0 (i2), .A1 (n_1), .A2 (i0), .B0 (n_0), .B1 (n_3), .B2\n"
    "       (n_2), .Z (n_4));\n"
    "  Q_INV g187(.A (i3), .Z (n_3));\n"
    "  Q_INV g188(.A (i0), .Z (n_2));\n"
    "  Q_INV g189(.A (i1), .Z (n_1));\n"
    "  Q_INV g190(.A (i2), .Z (n_0));\n"
    "endmodule;\n\n"
    "module combfn1(clk, i0, i1, i2, i3, o);\n"
    "  input clk, i0, i1, i2, i3;\n"
    "  output o;\n"
    "  wire clk, i0, i1, i2, i3;\n"
    "  wire o;\n"
    "  wire UNCONNECTED0, n_0, n_1;\n"
    "  Q_FDP0I0 o_reg(.CK (clk), .D (n_1), .Q (o), .QN (UNCONNECTED0));\n"
    "  Q_NR04 g59__4296(.A0 (i2), .A1 (i1), .A2 (n_0), .A3 (i3), .Z (n_1));\n"
    "  Q_INV g60(.A (i0), .Z (n_0));\n"
    "endmodule")

matches = re.finditer(regex, test_str, re.MULTILINE | re.IGNORECASE)

for matchNum, match in enumerate(matches, start=1):

    print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))

    for groupNum in range(0, len(match.groups())):
        groupNum = groupNum + 1

        print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))

Test with re.findall

import re

regex = r"module\s+([^)(]+)|(Q_[\s\S]*?;)"

test_str = ("module combfn1789(clk, i0, i1, i2, i3, o);\n"
    "  input clk, i0, i1, i2, i3; \n"
    "  output o;\n"
    "  wire clk, i0, i1, i2, i3;\n"
    "  wire o;\n"
    "  wire UNCONNECTED788, n_0, n_1, n_2, n_3, n_4;\n"
    "  Q_FDP0I0 o_reg(.CK (clk), .D (n_4), .Q (o), .QN (UNCONNECTED788));\n"
    "  Q_OAI33 g186(.A0 (i2), .A1 (n_1), .A2 (i0), .B0 (n_0), .B1 (n_3), .B2\n"
    "       (n_2), .Z (n_4));\n"
    "  Q_INV g187(.A (i3), .Z (n_3));\n"
    "  Q_INV g188(.A (i0), .Z (n_2));\n"
    "  Q_INV g189(.A (i1), .Z (n_1));\n"
    "  Q_INV g190(.A (i2), .Z (n_0));\n"
    "endmodule;\n\n"
    "module combfn1(clk, i0, i1, i2, i3, o);\n"
    "  input clk, i0, i1, i2, i3;\n"
    "  output o;\n"
    "  wire clk, i0, i1, i2, i3;\n"
    "  wire o;\n"
    "  wire UNCONNECTED0, n_0, n_1;\n"
    "  Q_FDP0I0 o_reg(.CK (clk), .D (n_1), .Q (o), .QN (UNCONNECTED0));\n"
    "  Q_NR04 g59__4296(.A0 (i2), .A1 (i1), .A2 (n_0), .A3 (i3), .Z (n_1));\n"
    "  Q_INV g60(.A (i0), .Z (n_0));\n"
    "endmodule")

print(re.findall(regex, test_str))

DEMO

The expression is explained on the top right panel of this demo, if you wish to explore further or simplify/modify it, and in this link, you can watch how it would match against some sample inputs step by step, if you like.

Upvotes: 1

Related Questions