diff --git a/html/__init__.py b/html/__init__.py
index c6ebbbb..af56099 100644
--- a/html/__init__.py
+++ b/html/__init__.py
@@ -2,13 +2,20 @@
"""
Wrapper methods used for mapping HTML to docx objects
+Obtained from: https://github.com/fokoenecke/html_docx
"""
+import re
+
from lxml.html import fromstring
-from mindboard.helpers.docx.html.converter import DocxBuilder
+from converter import DocxBuilder
def add_html(container, html_string):
+
+ # NOTE: Added for backward compatibility with line breaks in text
+ html_string = re.sub('\n', '
', html_string)
+
root = fromstring(html_string)
builder = DocxBuilder(container=container)
builder.from_html_tree(root=root)
\ No newline at end of file
diff --git a/html/converter.py b/html/converter.py
index 2e51a2b..50a0883 100644
--- a/html/converter.py
+++ b/html/converter.py
@@ -5,7 +5,7 @@
mapping HTML tags to their corresponding python-docx functions.
Appending full HTML structure to the given document.
"""
-from mindboard.helpers.docx.html.dispatcher import get_tag_dispatcher
+from dispatcher import get_tag_dispatcher
class DocxBuilder(object):
@@ -38,6 +38,8 @@ def _append_docx_elements(self, html_element, container):
for child in children:
self._append_docx_elements(child, new_container)
- dispatcher = get_tag_dispatcher(html_element.getparent().tag)
+ parent = html_element.getparent()
+ if parent is not None:
+ dispatcher = get_tag_dispatcher(parent.tag)
if html_element.tail and dispatcher:
dispatcher.append_tail(html_element, container)
diff --git a/html/dispatcher.py b/html/dispatcher.py
index 30919d9..e062a77 100644
--- a/html/dispatcher.py
+++ b/html/dispatcher.py
@@ -5,15 +5,17 @@
the different docx elements.
"""
-from mindboard.helpers.docx.html.tag_dispatchers.blockquote import BlockquoteDispatcher
-from mindboard.helpers.docx.html.tag_dispatchers.code import CodeDispatcher
-from mindboard.helpers.docx.html.tag_dispatchers.emphasis import EmphasisDispatcher
-from mindboard.helpers.docx.html.tag_dispatchers.heading import HeadingDispatcher
-from mindboard.helpers.docx.html.tag_dispatchers.linebreak import LineBreakDispatcher
-from mindboard.helpers.docx.html.tag_dispatchers.link import LinkDispatcher
-from mindboard.helpers.docx.html.tag_dispatchers.list_item import ListItemDispatcher
-from mindboard.helpers.docx.html.tag_dispatchers.paragraph import ParagraphDispatcher
-from mindboard.helpers.docx.html.tag_dispatchers.strong import StrongDispatcher
+from tag_dispatchers.blockquote import BlockquoteDispatcher
+from tag_dispatchers.code import CodeDispatcher
+from tag_dispatchers.emphasis import EmphasisDispatcher
+from tag_dispatchers.heading import HeadingDispatcher
+from tag_dispatchers.linebreak import LineBreakDispatcher
+from tag_dispatchers.link import LinkDispatcher
+from tag_dispatchers.list_item import ListItemDispatcher
+from tag_dispatchers.paragraph import ParagraphDispatcher
+from tag_dispatchers.strong import StrongDispatcher
+from tag_dispatchers.underline import UnderlineDispatcher
+from tag_dispatchers.div import DivDispatcher
def get_tag_dispatcher(html_tag):
@@ -31,8 +33,12 @@ def get_tag_dispatcher(html_tag):
a=LinkDispatcher(),
li=ListItemDispatcher(),
br=LineBreakDispatcher(),
+ div=DivDispatcher(),
code=CodeDispatcher(),
strong=StrongDispatcher(),
+ b=StrongDispatcher(),
+ i=EmphasisDispatcher(),
+ u=UnderlineDispatcher(),
em=EmphasisDispatcher(),
h1=heading_dispatcher,
h2=heading_dispatcher,
diff --git a/html/tag_dispatchers/__init__.py b/html/tag_dispatchers/__init__.py
index 9c5853f..fa687fc 100644
--- a/html/tag_dispatchers/__init__.py
+++ b/html/tag_dispatchers/__init__.py
@@ -55,4 +55,19 @@ def replace_whitespaces(text):
text = ' '.join(text.split('\n'))
text = re.sub(' +', ' ', text)
- return text if text else ''
\ No newline at end of file
+ return text if text else ''
+
+
+# From: http://stackoverflow.com/questions/3170055/test-if-lists-share-any-items-in-python
+def lists_overlap(a, b):
+ sb = set(b)
+ return any(el in sb for el in a)
+
+
+# Get parental tags (so that bold+underline+italics is dealt with correctly)
+def get_parental_tags(temp_element):
+ temp_array = []
+ while temp_element.getparent():
+ temp_element = temp_element.getparent()
+ temp_array.append(temp_element.tag)
+ return temp_array
diff --git a/html/tag_dispatchers/blockquote.py b/html/tag_dispatchers/blockquote.py
index fa29200..630a4d0 100644
--- a/html/tag_dispatchers/blockquote.py
+++ b/html/tag_dispatchers/blockquote.py
@@ -1,5 +1,5 @@
# encoding: utf-8
-from mindboard.helpers.docx.html.tag_dispatchers import TagDispatcher
+from ..tag_dispatchers import TagDispatcher
class BlockquoteDispatcher(TagDispatcher):
diff --git a/html/tag_dispatchers/code.py b/html/tag_dispatchers/code.py
index d7e8b68..82d9f72 100644
--- a/html/tag_dispatchers/code.py
+++ b/html/tag_dispatchers/code.py
@@ -1,5 +1,5 @@
# encoding: utf-8
-from mindboard.helpers.docx.html.tag_dispatchers import TagDispatcher
+from ..tag_dispatchers import TagDispatcher
class CodeDispatcher(TagDispatcher):
diff --git a/html/tag_dispatchers/div.py b/html/tag_dispatchers/div.py
new file mode 100644
index 0000000..e03e841
--- /dev/null
+++ b/html/tag_dispatchers/div.py
@@ -0,0 +1,33 @@
+# encoding: utf-8
+from ..tag_dispatchers import TagDispatcher, replace_whitespaces
+
+
+class DivDispatcher(TagDispatcher):
+ def __init__(self):
+ super(DivDispatcher, self).__init__()
+
+ @classmethod
+ def append_head(cls, element, container):
+ paragraph = cls.get_new_paragraph(container)
+ return cls._append_div(element.text, element, paragraph)
+
+ @classmethod
+ def append_tail(cls, element, container):
+ paragraph = cls.get_current_paragraph(container)
+ return cls._append_div(element.tail, element, paragraph)
+
+ @classmethod
+ def _append_div(cls, text, element, container):
+ """
+