Extending PyDocX¶
Customizing the HTML Exporter¶
Basic HTML exporting
is implemented in
pydocx.export.html.PyDocXHTMLExporter
.
To override default behavior,
simply extend the class
and implement the desired methods.
Here are a few examples
to show you what is possible:
class MyPyDocXHTMLExporter(PyDocXExporter):
def __init__(self, path):
# Handle dstrike the same as italic
self.export_run_property_dstrike = self.export_run_property_italic
super(MyPyDocXHTMLExporter, self).__init__(path=path)
# Perform specific pre-processing
def export(self):
self.delete_only_FOO_text_nodes()
return super(MyPyDocXHTMLExporter, self).export()
def delete_only_FOO_text_nodes(self):
# Delete all text nodes that match 'FOO' exactly
document = self.main_document_part.document
for body_child in document.body.children:
if isinstance(body_child, wordprocessing.Paragraph):
paragraph = body_child
for paragraph_child in paragraph.children:
if isinstance(paragraph_child, wordprocessing.Run):
run = paragraph_child
for run_child in run.children[:]:
if isinstance(run_child, wordprocessing.Text):
text = run_child
if text.text == 'FOO'
run.children.remove(text)
# Don't display head
def head(self):
return
# The exporter expects all methods to return a generator
yield # this syntax causes an empty generator to be returned
# Ignore page break
def get_break_tag(self, br):
if br.is_page_break():
pass
else:
return super(MyPyDocXHTMLExporter, self).get_break_tag(br)
# Do not show deleted runs
def export_deleted_run(self, deleted_run):
return
yield
# Custom table tag
def get_table_tag(self, table):
attrs = {
'class': 'awesome-table',
}
return HtmlTag('table', **attrs)
# By default, the HTML exporter wraps inserted runs in a span with
# class="pydocx-insert". This example overrides that method to skip
# that behavior by jumping to the base implementation.
def export_inserted_run(self, inserted_run):
return super(PyDocXExporter, self).export_inserted_run(inserted_run)
# Hide hidden runs
def export_run(self, run):
properties = run.effective_properties
if properties.vanish:
return
elif properties.hidden:
return
results = super(MyPyDocXHTMLExporter, self).export_run(run)
for result in results:
yield result
Implementing a new exporter¶
If you want to implement an exporter
for an unsupported markup language,
you can do that
by extending
pydocx.export.base.PyDocXExporter
as needed.
For example,
this shows how you might
create a custom exporter
for the FML,
or Foo Markup Language:
class PyDocXFOOExporter(PyDocXExporter):
# The "FOO" markup language denotes breaks using "\"
def export_break(self):
yield '\\'
def export_document(self, document):
yield 'START OF DOC'
results = super(PyDocXFOOExporter, self).export_document(self, document)
for result in results:
yield result
yield 'END OF DOC'
# Text must be wrapped in ()
def export_text(self, text):
yield '({0})'.format(text.text)
# Tables are denoted by [ ]
def export_table(self, table):
yield '['
results = super(PyDocXFOOExporter, self).export_table(self, table)
for result in results:
yield result
yield ']'
# Table rows are denoted by { }
def export_table_row(self, table_row):
yield '{'
results = super(PyDocXFOOExporter, self).export_table_row(self, table_row)
for result in results:
yield result
yield '}'
# Table cells are denoted by < >
def export_table_row(self, table_cell):
yield '<'
results = super(PyDocXFOOExporter, self).export_table_cell(self, table_cell)
for result in results:
yield result
yield '>'
The base exporter implementation
expects all methods
to return a generator.
For this reason,
it is not possible
to have an empty
method (pass
)
or have a method
that just returns None
.
The one caveat
is the special syntax
that causes a method
to return an empty
generator:
def empty_generator():
return
yield
This implementation is consistent with the “only generators” rule, and is actually computationally faster than returning an empty list.