Reputation: 12218
I'm working on a lightweight class wrapper for XML data, along the lines of http://packages.python.org/dexml/api/dexml.fields.html or https://gist.github.com/485977. The classes contain an elementTree Element and have descriptors to provide property access (Apologies for the amount of code included, I thought it would be easier to grok with most of the comments left in)
class XPropBase(object):
DEFAULT_TO = (lambda s, x: str(x))
DEFAULT_FROM = (lambda s, p: p)
def __init__(self, path, convert_from=None, convert_to = None, read_only = False, allow_missing=False):
'''
important bits here are convert_to and convert_from, which do the translation in and out of XML strings in derived classes...
'''
self.path = path
self.convert_from = convert_from or self.DEFAULT_FROM
self.convert_to = convert_to or self.DEFAULT_TO
self.read_only = read_only
self.allow_missing = allow_missing
def _get_xml(self, instance):
#only a an instance method for convenience...
#intended to be overridden for different target instances
return instance.get_element()
class XElement(XPropBase):
'''
Wraps an xml item whose content is contained in the text of
an XML tag, ie: <tag>Content</tag>. The convert_to and convert_from methods
will be applied to the text property of the corresponding Element
@note this will use the first instance of a given node path that it finds,
so it is not guaranteed if the supplied path leads to more than one xml tag.
'''
def __get__(self, instance, owner=None):
the_xml = self._get_xml(instance)
if not self.path:
return self.convert_from(the_xml.text)
try:
underlying = the_xml.find(self.path)
return self.convert_from(underlying.text)
except AttributeError:
if self.allow_missing:
return None
else:
raise XMLWrapperError, "%s has no element named %s" % (instance, self.path)
def __set__(self, instance, value, owner =None):
if self.read_only:
raise XMLWrapperError('%s is a read-only property' % self.path)
the_xml= self._get_xml(instance)
if not self.path:
the_xml.text = self.convert_to(value)
return
try:
underlying = self._get_xml(instance).find(self.path)
underlying.text = self.convert_to(value)
except AttributeError:
if self.allow_missing:
SubElement(self._get_xml(instance), self.path, text=self.convert_to(value))
else:
raise XMLWrapperError, "%s has no element named %s" % (instance, self.path)
class XAttrib(XPropBase):
'''
Wraps a property in an attribute on the containing xml tag specified by path
if the supplied attribute is not present, will raise an XMLWrapperError unless the allow_missing flag is set to True
'''
def __get__(self, instance, owner=None):
try:
res = self._get_xml(instance).attrib[self.path]
return self.convert_from(res)
except KeyError:
if self.allow_missing:
return None
raise XMLWrapperError, "%s has no attribute named %s" % (instance, self.path)
def __set__(self, instance, value, owner =None):
xml = self._get_xml(instance)
has_element = xml.get(self.path, 'NOT_FOUND')
if has_element == 'NOT_FOUND' and not self.allow_missing:
raise XMLWrapperError, "%s has no attribute named %s" % (instance, self.path)
xml.set(self.path, self.convert_to(value))
def _get_element(self):
return None
def _get_attribute(self):
return self.path
class XInstance(XPropBase):
'''
Represents an attribute which is mapped onto a class. The supplied class is specified in the constructor
@note: As with XElement, this works on the first appropriately named tag it
finds. If there are multiple path values with the same tag, it will cause
errors.
'''
def __init__(self, path, cls, convert_from=None, convert_to = None, read_only = False, allow_missing=False):
self.cls = cls
XPropBase.__init__(self, path, convert_from = convert_from , convert_to = convert_to , read_only = read_only, allow_missing=allow_missing)
def __get__(self, instance, owner=None):
sub_elem = self._get_xml(instance).find(self.path)
if not sub_elem and not self.allow_missing:
XMLWrapperError, "%s has no child named %s" % (instance, self.path)
return self.cls(sub_elem)
def __set__(self, instance, value):
my_element = self._get_xml(instance)
original_child = my_element.find(self.path)
if original_child:
my_element.remove(original_child)
my_element.append(self._get_xml(value))
class XInstanceGroup(XInstance):
'''
Represents a collection of XInstances contained below a particular tag
'''
def __get__(self, instance, owner=None):
return [self.cls(item) for item in self._get_xml(instance).findall(self.path)]
def __set__(self, instance, value):
my_element = self._get_xml(instance)
for item in my_element.findall(self.path):
my_element.remove(item)
for each_element in map(self._get_xml, value):
my_element.append(each_element)
Seems to work (though thorough testing is to come) but there is one annoying bit. The XInstanceGroup descriptor handles cases like this:
<Object name="dummy">
<Child name="kid2" blah="dee blah"/>
<Child name="kid2" blah="zey"/>
</Object>
class Kid(XMLData):
Name = XAttribute("name")
Blah = XAttribute("blah")
class DummyWrapper(XMLData):
Name = XAttribute("name")
Kids = XInstanceGroup('Child', Kid)
So if you as for a DummyWrapper for it's kids you get a list of Kid objects. However I'm unhappy about the process of updating that list:
#this works
kids = Dummy_example.Kids
kids.append(Kid (name = 'marky mark', blah='funky_fresh'))
Dummy_example.Kids = kids
#this doesn't
Dummy_example.Kids.append(Kid(name = 'joey fatone', blah = 'indeed!'))
necessitated by the fact that Dummy.Kids is really a function that returns a group, not a persistent list object stored as a member field.
Now for the question: Is there a way to do this using descriptors? It seems like the hurdle is that the descriptor instance can't persist the data - it only knows about the instance when it's called. I dislike the idea of somehow injecting the storage into the instances from the descriptor (if nothing else, it increases coupling unpleasantly). The obvious Googling has not helped so far.
Upvotes: 1
Views: 248
Reputation: 157
You're making an ORM! (essentially) The essential bits of this problem are
class MyListField:
def __get__(self, instance, owner=None):
pass
class MyThing:
foo = MyListField()
thing = MyThing()
Where when you access thing.foo
it's lazily resolved and cached so you can use it as if it were a regular list. You need a metaclass, my apologies.
from abc import ABC, abstractmethod
class BaseField(ABC):
def __init__(self, name=None):
self.name = name
def _sanity_check(self, instance):
if not isinstance(instance, BaseThing):
raise ValueError(f"{self.__class__} can't be used on this class")
def _get_cache(self, instance):
self._sanity_check(instance)
return getattr(instance, "__cache__")
def __get__(self, instance, owner=None):
cache = self._get_cache(instance)
if self.name not in cache:
val = self.resolve(instance)
cache[self.name] = val
return val
return cache[self.name]
def __set__(self, instance, value):
cache = self._get_cache(instance)
cache[self.name] = value
@abstractmethod
def resolve(self, instance):
...
class BaseThingMeta(type):
def __new__(cls, name, bases, dct):
for attr, field in dct.items():
if isinstance(field, BaseField):
# *magic* Give names to the fields based on what they were assigned to.
field.name = attr
return super().__new__(cls, name, bases, dct)
class BaseThing(metaclass=BaseThingMeta):
def __init__(self):
self.__cache__ = {}
class MyListField(BaseField):
def resolve(self, instance):
return [1, 2, 3, 4]
class MyThing(BaseThing):
foo = MyListField()
thing = MyThing()
print(thing.foo)
thing.foo.append(5)
print(thing.foo)
This prints:
[1, 2, 3, 4]
[1, 2, 3, 4, 5]
Upvotes: 0