But, sometimes, they could be too literal. There are options to deal with this, such as the use of ellipsis (...) as wildcards. They are not enough to deal with XML output, though:
File "/home/lsoto/src/django.doj/tests/modeltests/model_forms/models.py" [...]
Failed example:
print f['name']
Expected:
<input id="id_name" type="text" name="name" maxlength="20" />
Got:
<input name="name" id="id_name" type="text" maxlength="20" />
(that's an actual failure from the Django test suite running on Jython)
To solve this, I implemented a doctest OutputChecker, inspired by (but not as polished as) lxml.doctestcompare. The plus side? The code uses the xml.dom.minidom stdlib API (instead of ElementTree or lxml), so it works without any third party library. Here is the core code:
def check_output_xml(self, want, got, optionsflags):
# Tries to do a 'xml-comparision' of want and got. Plan string
# comparision doesn't always work, because, for example, attribute
# ordering should not be important.
#
# Based on http://codespeak.net/svn/lxml/trunk/src/lxml/doctestcompare.py
# We use this to distinguish repr()s from elements:
_repr_re = re.compile(r'^<[^>]+ (at|object) ')
_norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+')
def norm_whitespace(v):
return _norm_whitespace_re.sub(' ', v)
def looks_like_markup(s):
s = s.strip()
return (s.startswith('<')
and not _repr_re.search(s))
def is_quoted_string(s):
s = s.strip()
return (len(s) >= 2
and s[0] == s[-1]
and s[0] in ('"', "'"))
def is_quoted_unicode(s):
s = s.strip()
return (len(s) >= 3
and s[0] == 'u'
and s[1] == s[-1]
and s[1] in ('"', "'"))
def child_text(element):
return ''.join([c.data for c in element.childNodes
if c.nodeType == Node.TEXT_NODE])
def children(element):
return [c for c in element.childNodes
if c.nodeType == Node.ELEMENT_NODE]
def norm_child_text(element):
return norm_whitespace(child_text(element))
def attrs_dict(element):
return dict(element.attributes.items())
def check_element(want_element, got_element):
if want_element.tagName != got_element.tagName:
return False
if norm_child_text(want_element) != norm_child_text(got_element):
return False
if attrs_dict(want_element) != attrs_dict(got_element):
return False
want_children = children(want_element)
got_children = children(got_element)
if len(want_children) != len(got_children):
return False
for want, got in zip(want_children, got_children):
if not check_element(want, got):
return False
return True
# Strip quotes
if is_quoted_string(want) and is_quoted_string(got):
want = want.strip()[1:-1]
got = got.strip()[1:-1]
elif is_quoted_unicode(want) and is_quoted_unicode(got):
want = want.strip()[2:-1]
got = got.strip()[2:-1]
if not looks_like_markup(want):
return False
# Wrapper to suuport XML fragments
wrapper = u"<root>%s</root>"
try:
want_root = parseString(wrapper % want).firstChild
got_root = parseString(wrapper % got).firstChild
except:
return False
return check_element(want_root, got_root)
Note that, as is, it doesn't support HTML. That's not a problem on the Django test suite (where everything is XHTML) but it would be nice to add such support and then submit the checker upstream to CPython's doctest.py. So we could do something like this everywhere:
>>> print f['name'] # doctest: +XML
<input id="id_name" type="text" name="name" maxlength="20" />
Next target: JSON output!
Update: I've submitted a patch to Django with support for both, XML and JSON output.

