Reputation: 1001
When using Pyparsing to parse a file it's taking around a minute to parse Ex1 and around 15s to parse Ex2. The only difference is that in Ex1 the variables used by PyParsing are declared as globals before the class. In Ex2 the variables are declared inside each method individually (ex. def parse_components(self)
). Is this expected? If yes, what's the explanation?
import modules
import pyparsing as pp
from collections import defaultdict
from multiprocessing import (Process, Manager, Event)
# GLOBALS for PyParsing
EOL = pp.LineEnd().suppress()
linebreak = pp.Suppress(";" + pp.LineEnd())
identifier = pp.Word(pp.alphanums + '._“!<>/[]$#$%&‘*+,/:<=>?@[\]^_`{|}~')
number = pp.pyparsing_common.number
word = pp.Word(pp.alphas)
LPAR = pp.Suppress('(')
RPAR = pp.Suppress(')')
ORIENT = (pp.Keyword('N')
| pp.Keyword('S')
| pp.Keyword('E')
| pp.Keyword('W')
| pp.Keyword('FN')
| pp.Keyword('FS')
| pp.Keyword('FE')
| pp.Keyword('FW'))
pt = LPAR + pp.OneOrMore(number | pp.Keyword('*')) + RPAR # pair of x,y
class DEF():
def __init__(self, Base):
self.mydict = lambda: defaultdict(self.mydict)
...
...
self.ignore_nets = True
self.ignore_nets_route = False
# Each list is a new process. Careful with dependencies.
# eg. 'dbuPerMicron' must be executed before the others, but it can be after 'design'
self.sections_grp = [['design', 'dbuPerMicron', 'diearea'],
['components'],
]
if not self.ignore_nets:
self.sections_grp.append(['nets'])
self.n_elems_sections_grp = sum([len(x) for x in self.sections_grp])
self.counter = modules.SharedCounter(0)
self.events = [Event()]
self.design = ''
modules.debug_log.print_repr([self.__repr__()])
#
def run(self):
for curr_file in self.def_file_design:
ifile = open(curr_file,'r')
file_string = ifile.read()
ifile.close()
self.parse_all(file_string)
# Create a process for each section to parse
def parse_all(self, file_string):
manager = Manager()
shared_dict = manager.dict()
jobs = []
for sections in self.sections_grp:
p = Process(target=self.parse_sections, args=(sections, file_string, shared_dict))
jobs.append(p)
p.start()
# Wait for the workers to finish
for job in jobs:
job.join()
for sections in self.sections_grp:
for section in sections:
getattr(self, 'handle_' + section)(shared_dict)
# Spawn the processes from each group of self.sections_grp
def parse_sections(self, sections, file_string, shared_dict):
for section in sections:
to_parse = getattr(self, 'parse_' + section)
for t, s, e in to_parse().scanString(file_string):
shared_dict.update(t.asDict())
break
# Parse the DESIGN section of a .DEF file
def parse_design(self):
...
return design
# Parse the UNITS DISTANCE MICRONS section of a .DEF file
def parse_dbuPerMicron(self):
...
return dbuPerMicron
# Parse the DIEAREA section of a .DEF file
def parse_diearea(self):
...
return diearea
# Parse the COMPONENTS section of a .DEF file
def parse_components(self):
self.events[0].wait() # Wait for event[0] to finish
components_id = pp.Keyword('COMPONENTS')
end_components_id = pp.Keyword("END COMPONENTS").suppress()
begin_comp = pp.Suppress(pp.Keyword('-'))
ws_comp = pp.Suppress(pp.Keyword('+')) # parameter division in components
# compName
compName = (identifier('comp_name') + identifier('cell')
).setResultsName('compName')
...
...
...
subcomponent = pp.Group(begin_comp
+ compName
+ pp.Optional(EEQMASTER)
+ pp.Optional(SOURCE) # & because it can be in any order
+ pp.Optional(PLACEMENT)
+ pp.Optional(MASKSHIFT)
+ pp.Optional(HALO)
+ pp.Optional(ROUTEHALO)
+ pp.Optional(WEIGHT)
+ pp.Optional(REGION)
+ pp.ZeroOrMore(PROPERTY)
+ pp.Suppress(linebreak)
).setResultsName('subcomponents', listAllMatches=True)
components = pp.Group(pp.Suppress(components_id)
+ number('numComps')
+ pp.Suppress(linebreak)
+ pp.OneOrMore(subcomponent)
+ pp.Suppress(end_components_id)
).setResultsName('COMPONENTS')
return components
import modules
import pyparsing as pp
from collections import defaultdict
from multiprocessing import (Process, Manager, Event)
class DEF():
def __init__(self, Base):
self.mydict = lambda: defaultdict(self.mydict)
...
...
self.ignore_nets = True
self.ignore_nets_route = False
# Each list is a new process. Careful with dependencies.
# eg. 'dbuPerMicron' must be executed before the others, but it can be after 'design'
self.sections_grp = [['design', 'dbuPerMicron', 'diearea'],
['components'],
]
if not self.ignore_nets:
self.sections_grp.append(['nets'])
self.n_elems_sections_grp = sum([len(x) for x in self.sections_grp])
self.counter = modules.SharedCounter(0)
self.events = [Event()]
self.design = ''
modules.debug_log.print_repr([self.__repr__()])
#
def run(self):
for curr_file in self.def_file_design:
ifile = open(curr_file,'r')
file_string = ifile.read()
ifile.close()
self.parse_all(file_string)
# Create a process for each section to parse
def parse_all(self, file_string):
manager = Manager()
shared_dict = manager.dict()
jobs = []
for sections in self.sections_grp:
p = Process(target=self.parse_sections, args=(sections, file_string, shared_dict))
jobs.append(p)
p.start()
# Wait for the workers to finish
for job in jobs:
job.join()
for sections in self.sections_grp:
for section in sections:
getattr(self, 'handle_' + section)(shared_dict)
# Spawn the processes from each group of self.sections_grp
def parse_sections(self, sections, file_string, shared_dict):
for section in sections:
to_parse = getattr(self, 'parse_' + section)
for t, s, e in to_parse().scanString(file_string):
shared_dict.update(t.asDict())
break
# Parse the DESIGN section of a .DEF file
def parse_design(self):
...
return design
# Parse the UNITS DISTANCE MICRONS section of a .DEF file
def parse_dbuPerMicron(self):
...
return dbuPerMicron
# Parse the DIEAREA section of a .DEF file
def parse_diearea(self):
...
return diearea
# Parse the COMPONENTS section of a .DEF file
def parse_components(self):
self.events[0].wait() # Wait for event[0] to finish
EOL = pp.LineEnd().suppress()
linebreak = pp.Suppress(";" + pp.LineEnd())
identifier = pp.Word(pp.alphanums + '._“!<>/[]$#$%&‘*+,/:<=>?@[\]^_`{|}~')
number = pp.pyparsing_common.number
word = pp.Word(pp.alphas)
LPAR = pp.Suppress('(')
RPAR = pp.Suppress(')')
ORIENT = (pp.Keyword('N')
| pp.Keyword('S')
| pp.Keyword('E')
| pp.Keyword('W')
| pp.Keyword('FN')
| pp.Keyword('FS')
| pp.Keyword('FE')
| pp.Keyword('FW'))
pt = LPAR + pp.OneOrMore(number | pp.Keyword('*')) + RPAR # pair of x,y
components_id = pp.Keyword('COMPONENTS')
end_components_id = pp.Keyword("END COMPONENTS").suppress()
begin_comp = pp.Suppress(pp.Keyword('-'))
ws_comp = pp.Suppress(pp.Keyword('+')) # parameter division in components
# compName
compName = (identifier('comp_name') + identifier('cell')
).setResultsName('compName')
...
...
...
subcomponent = pp.Group(begin_comp
+ compName
+ pp.Optional(EEQMASTER)
+ pp.Optional(SOURCE) # & because it can be in any order
+ pp.Optional(PLACEMENT)
+ pp.Optional(MASKSHIFT)
+ pp.Optional(HALO)
+ pp.Optional(ROUTEHALO)
+ pp.Optional(WEIGHT)
+ pp.Optional(REGION)
+ pp.ZeroOrMore(PROPERTY)
+ pp.Suppress(linebreak)
).setResultsName('subcomponents', listAllMatches=True)
components = pp.Group(pp.Suppress(components_id)
+ number('numComps')
+ pp.Suppress(linebreak)
+ pp.OneOrMore(subcomponent)
+ pp.Suppress(end_components_id)
).setResultsName('COMPONENTS')
return components
Upvotes: 2
Views: 115
Reputation: 4427
The likely culprit seems likely to be staring at you here:
... in Ex1 the variables used by PyParsing are declared as globals before the class.
from multiprocessing import (Process, Manager, Event)
Multiprocessing may be reloading or interacting with this in a funny way. Is DEF.sections_grp
always a single list (e.g. 1 process) in your timing tests?
Upvotes: 1