O'Skywalker
O'Skywalker

Reputation: 701

pyparsing can only find one instance

I want to parse Windows Resource File with pyparsing, because Menu can have deep nesting structure. It would be very difficult to parse such structure with regular expressions.

Everything works properly, but today I found that my code can only find one instance. To make me clear, here is the contents of *.rc file(E:\tool\res\my.rc, to save space only show the error prone part):

#include "../include/resource.h"

IDR_MENU_OPTION MENU BEGIN
    POPUP "Options"
    BEGIN
        MENUITEM "List Layers for &All Pages",  IDM_SHOW_ALL
        MENUITEM "List Layers for &Visible Pages", IDM_SHOW_VISIBLE
        MENUITEM SEPARATOR
        MENUITEM "&Reset to Initial Visibility", IDM_RESET_INIT
        MENUITEM SEPARATOR
        MENUITEM "E&xpand All",                 IDM_EXPAND_ALL
        MENUITEM "C&ollapse All",               IDM_COLLAPSE_ALL
    END
    POPUP ""
    BEGIN
        MENUITEM "List Layers for &All Pages",  IDM_LIST_ALL
        MENUITEM "List Layers for &Visible Pages", IDM_LIST_VISIBLE
        MENUITEM SEPARATOR
        MENUITEM "&Reset to Initial Visibility", IDM_RESET_INIT
        MENUITEM SEPARATOR
        MENUITEM "E&xpand All",                 IDM_EXPAND_ALL
        MENUITEM "C&ollapse All",               IDM_COLLAPSE_ALL
        MENUITEM SEPARATOR
        MENUITEM "Layer &Properties...",        IDM_LAYER_PROPERTIES
    END END

IDR_MENU_PRPPERTIES MENU BEGIN // the menu block is skiped by pyparsing
    POPUP ""
    BEGIN
        MENUITEM "&Show Layers",                IDM_SHOW
        MENUITEM "&Properties...",              IDM_PROPERTIES
    END
    MENUITEM "",                            65535 END

#endif    // not APSTUDIO_INVOKED

my Python code can't find IDR_MENU_PRPPERTIES MENU, the output now is:

IDM_COLLAPSE_ALL
IDM_EXPAND_ALL
IDM_LAYER_PROPERTIES
IDM_LIST_ALL
IDM_LIST_VISIBLE
IDM_RESET_INIT
IDM_SHOW_ALL
IDM_SHOW_VISIBLE
IDR_MENU_OPTION

but the expected output should be:

IDM_COLLAPSE_ALL
IDM_EXPAND_ALL
IDM_LAYER_PROPERTIES
IDM_LIST_ALL
IDM_LIST_VISIBLE
IDM_RESET_INIT
IDM_SHOW_ALL
IDM_SHOW_VISIBLE
IDR_MENU_OPTION
IDR_MENU_PRPPERTIES
IDM_SHOW
IDM_PROPERTIES

and here is my code:

import re
import os
import codecs
import fnmatch
from bs4 import UnicodeDammit
from pyparsing import restOfLine, cStyleComment, Word, alphanums, alphas, \
    Optional, SkipTo, ZeroOrMore, Group, Keyword, quotedString, delimitedList, \
    nums, commaSeparatedList, Forward, Combine


class RcParser:
    def __init__(self, rc_file):
        self.rc_file = rc_file
        handle = open(rc_file, 'rb')
        binary_data = handle.read()
        handle.close()
        dammit = UnicodeDammit(binary_data)
        self.rc_src = dammit.unicode_markup
        self.encoding = dammit.original_encoding
        self.string_table_id = set()
        self.dialog_id = set()
        self.menu_id = set()
        self.img_id = set()

        self.parse(self.rc_src)

    def get_rc_header(self):
        inx = self.rc_file.rfind('\\')
        path = self.rc_file[: inx + 1]
        file_lists = [path + file for file in os.listdir(path) if file.lower().endswith('resource.h')]
        if not file_lists:
            return None
        return file_lists[0]

    def id_by_parsing_rc(self):
        rc_id = self.img_id | self.menu_id | self.dialog_id | self.string_table_id
        return rc_id

    def rc_statement(self):
        """ Generate a RC statement parser that can be used to parse a RC file

        :rtype: pyparsing.ParserElement
        """

        one_line_comment = '//' + restOfLine
        comments = cStyleComment ^ one_line_comment
        precompiler = Word('#', alphanums) + restOfLine
        language_definition = "LANGUAGE" + Word(alphas + '_').setResultsName(
            "language") + Optional(',' + Word(alphas + '_').setResultsName("sublanguage"))
        block_start = (Keyword('{') | Keyword("BEGIN")).setName("block_start")
        block_end = (Keyword('}') | Keyword("END")).setName("block_end")
        reserved_words = block_start | block_end
        name_id = ~reserved_words + \
                  Word(alphas, alphanums + '_').setName("name_id")
        numbers = Word(nums)
        integerconstant = numbers ^ Combine('0x' + numbers)
        constant = Combine(
            Optional(Keyword("NOT")) + (name_id | integerconstant), adjacent=False, joinString=' ')
        combined_constants = delimitedList(constant, '|')
        block_options = Optional(SkipTo(
            Keyword("CAPTION"), failOn=block_start)("pre_caption") + Keyword("CAPTION") + quotedString(
            "caption")) + SkipTo(
            block_start)("post_caption")
        undefined_control = Group(name_id.setResultsName(
            "id_control") + delimitedList(quotedString ^ constant ^ numbers ^ Group(combined_constants)).setResultsName(
            "values_"))
        block = block_start + \
                ZeroOrMore(undefined_control)("controls") + block_end
        dialog = name_id(
            "block_id") + (Keyword("DIALOGEX") | Keyword("DIALOG"))("block_type") + block_options + block
        string_table = Keyword("STRINGTABLE")(
            "block_type") + block_options + block
        menu_item = Keyword(
            "MENUITEM")("block_type") + (commaSeparatedList("values_") | Keyword("SEPARATOR"))
        popup_block = Forward()
        popup_block <<= Group(Keyword("POPUP")("block_type") + Optional(quotedString("caption")) + block_start +
                              ZeroOrMore(Group(menu_item | popup_block))("elements") + block_end)("popups*")
        menu = name_id("block_id") + \
               Keyword("MENU")("block_type") + block_options + \
               block_start + ZeroOrMore(popup_block) + block_end
        statem = comments ^ precompiler ^ language_definition ^ dialog ^ string_table ^ menu
        return statem

    def generate_menu_pre_name(self, block_type, block_id):
        """Return the pre-name generated for elements of a menu."""
        return "%s.%s" % (block_type, block_id)

    def generate_popup_pre_name(self, pre_name, caption):
        """Return the pre-name generated for subelements of a popup.

        :param pre_name: The pre_name that already have the popup.
        :param caption: The caption (whitout quotes) of the popup.

        :return: The subelements pre-name based in the pre-name of the popup and
                 its caption.
        """
        return "%s.%s" % (pre_name, caption.replace(" ", "_"))

    def add_popup_units(self, pre_name, popup):
        """Transverses the popup tree making new units as needed."""
        for element in popup.elements:
            if element.block_type and element.block_type == "MENUITEM":
                if element.values_ and len(element.values_) >= 2:
                    var = element.values_[1]
                    if not var.isdigit():
                        self.menu_id.add(var)
                        # Else it can be a separator.
            elif element.popups:
                for sub_popup in element.popups:
                    self.add_popup_units(self.generate_popup_pre_name(pre_name, popup.caption[1:-1]), sub_popup)

    def parse(self, rcsrc):
        """Read the source of a .rc file in and include them as units."""
        # Parse the strings into a structure.
        results = self.rc_statement().searchString(rcsrc)
        for statement in results:
            if not statement.block_type:
                continue
            if statement.block_type in ("DIALOG", "DIALOGEX"):
                helper = statement.block_id[0]
                self.dialog_id.add(statement.block_id[0])
                control_type = [
                    "AUTOCHECKBOX", "AUTORADIOBUTTON", "CAPTION", "CHECKBOX"
                    , "CTEXT", "CONTROL", "DEFPUSHBUTTON", "GROUPBOX"
                    , "LTEXT", "PUSHBUTTON", "RADIOBUTTON", "RTEXT"
                    , "COMBOBOX"
                ]
                for control in statement.controls:
                    fk = (control.id_control[0] in control_type)
                    flag = (control.values_[0].startswith('"') or control.values_[0].startswith("'"))
                    if control.id_control[0] in control_type:
                        if flag:
                            self.dialog_id.add(control.values_[1])
                        else:
                            self.dialog_id.add(control.values_[0])
                continue

            if statement.block_type in ("MENU"):
                pre_name = self.generate_menu_pre_name(statement.block_type, statement.block_id[0])
                self.menu_id.add(statement.block_id[0])
                for popup in statement.popups:
                    self.add_popup_units(pre_name, popup)
                continue

            if statement.block_type in ("STRINGTABLE"):
                for text in statement.controls:
                    self.string_table_id.add(text.id_control[0])
                continue

        lines = rcsrc.splitlines()
        for line in lines:
            line = line.rstrip()
            m = re.match(r'(\w+)\s+(\bBITMAP\b|\bPNG\b|\bXML\b|\bICON\b)\s+(\".*\")$', line)
            if not m:
                continue
            self.img_id.add(m.group(1))


def main():
    x = RcParser(r'E:\tool\res\my.rc')
    print('\n'.join(sorted(x.id_by_parsing_rc())))


if __name__ == "__main__":
    main()

Upvotes: 1

Views: 147

Answers (1)

PaulMcG
PaulMcG

Reputation: 63747

Your definition of a menu is:

menu = name_id("block_id") + \
       Keyword("MENU")("block_type") + block_options + \
       block_start + ZeroOrMore(popup_block) + block_end

Within your block_start/block_end you only allow popup_blocks. In the menu that does not match, there is a menu_item in the menu that is not part of a popup_block. You may need something like:

menu = name_id("block_id") + \
       Keyword("MENU")("block_type") + block_options + \
       block_start + ZeroOrMore(popup_block | menu_item) + block_end

Upvotes: 1

Related Questions