Skip to content

Commit 8c83b14

Browse files
aarbouinalexysdussier
authored andcommitted
Support VML image size in shape
1 parent 569826e commit 8c83b14

File tree

2 files changed

+92
-16
lines changed

2 files changed

+92
-16
lines changed

mammoth/docx/body_xml.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from .styles_xml import Styles
1111
from .uris import replace_fragment, uri_to_zip_entry_name
1212

13-
EMU_TO_PIXEL = 1 / 9525
13+
EMU_PER_PIXEL = 9525
1414

1515
if sys.version_info >= (3, ):
1616
unichr = chr
@@ -404,7 +404,7 @@ def inline(element):
404404
return _read_blips(blips, alt_text, size)
405405

406406
def _emu_to_pixel(emu):
407-
return round(int(emu) * EMU_TO_PIXEL)
407+
return round(int(emu) / EMU_PER_PIXEL)
408408

409409
def _read_blips(blips, alt_text, size):
410410
return _ReadResult.concat(lists.map(lambda blip: _read_blip(blip, alt_text, size), blips))
@@ -454,14 +454,37 @@ def open_image():
454454

455455
return image_path, open_image
456456

457-
def read_imagedata(element):
457+
def shape(element):
458+
if len(element.children) == 1:
459+
imagedata = element.find_child("v:imagedata")
460+
if imagedata:
461+
size = _read_shape_size(element)
462+
return read_imagedata(imagedata, size)
463+
return read_child_elements(element)
464+
465+
def _read_shape_size(element):
466+
style_attribute = element.attributes.get("style")
467+
if not style_attribute:
468+
return None
469+
style = style_attribute.split(";")
470+
width = _extract_size_from_style("width", style)
471+
height = _extract_size_from_style("height", style)
472+
size = documents.Size(width=width, height=height)
473+
return size
474+
475+
def _extract_size_from_style(style_name, style):
476+
with_column = "{}:".format(style_name)
477+
raw_size = next(iter(filter(lambda s: s.startswith(with_column), style)))
478+
return raw_size.replace(with_column, "")
479+
480+
def read_imagedata(element, style=None):
458481
relationship_id = element.attributes.get("r:id")
459482
if relationship_id is None:
460483
warning = results.warning("A v:imagedata element without a relationship ID was ignored")
461484
return _empty_result_with_message(warning)
462485
else:
463486
title = element.attributes.get("o:title")
464-
return _read_image(lambda: _find_embedded_image(relationship_id), title)
487+
return _read_image(lambda: _find_embedded_image(relationship_id), title, style)
465488

466489
def note_reference_reader(note_type):
467490
def note_reference(element):
@@ -496,7 +519,7 @@ def read_sdt(element):
496519
"v:group": read_child_elements,
497520
"v:rect": read_child_elements,
498521
"v:roundrect": read_child_elements,
499-
"v:shape": read_child_elements,
522+
"v:shape": shape,
500523
"v:textbox": read_child_elements,
501524
"w:txbxContent": read_child_elements,
502525
"w:pict": pict,

tests/docx/body_xml_tests.py

Lines changed: 64 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import sys
33

44
from precisely import assert_that, is_sequence
5-
from nose.tools import istest, assert_equal
5+
from nose.tools import istest, assert_equal, assert_is_none
66
from nose_parameterized import parameterized, param
77
import funk
88

@@ -885,39 +885,92 @@ class ImageTests(object):
885885
IMAGE_RELATIONSHIP_ID = "rId5"
886886

887887
def _read_embedded_image(self, element):
888+
return self._read_embedded_images(element)[0]
889+
890+
def _read_embedded_images(self, element):
888891
relationships = Relationships([
889892
_image_relationship(self.IMAGE_RELATIONSHIP_ID, "media/hat.png"),
890893
])
891-
892894
mocks = funk.Mocks()
893895
docx_file = mocks.mock()
894896
funk.allows(docx_file).open("word/media/hat.png").returns(io.BytesIO(self.IMAGE_BYTES))
895-
896897
content_types = mocks.mock()
897898
funk.allows(content_types).find_content_type("word/media/hat.png").returns("image/png")
898-
899-
return _read_and_get_document_xml_element(
899+
return _read_and_get_document_xml_elements(
900900
element,
901901
content_types=content_types,
902902
relationships=relationships,
903903
docx_file=docx_file,
904904
)
905905

906906
@istest
907-
def can_read_imagedata_elements_with_rid_attribute(self):
908-
imagedata_element = xml_element("v:imagedata", {
909-
"r:id": self.IMAGE_RELATIONSHIP_ID,
910-
"o:title": "It's a hat"
911-
})
907+
def can_read_shape_elements_with_rid_and_size_attributes(self):
908+
shape_element = xml_element("v:shape", {"style": "width:31.5pt;height:38.25pt"}, [
909+
xml_element("v:imagedata", {
910+
"r:id": self.IMAGE_RELATIONSHIP_ID,
911+
"o:title": "It's a hat"
912+
})
913+
])
912914

913-
image = self._read_embedded_image(imagedata_element)
915+
image = self._read_embedded_image(shape_element)
914916

915917
assert_equal(documents.Image, type(image))
916918
assert_equal("It's a hat", image.alt_text)
917919
assert_equal("image/png", image.content_type)
920+
assert_equal(documents.Size(width="31.5pt", height="38.25pt"), image.size)
918921
with image.open() as image_file:
919922
assert_equal(self.IMAGE_BYTES, image_file.read())
920923

924+
@istest
925+
def cannot_resize_shape_with_multiple_nodes(self):
926+
shape_element = xml_element("v:shape", {"style": "width:31.5pt;height:38.25pt"}, [
927+
xml_element("v:imagedata", {
928+
"r:id": self.IMAGE_RELATIONSHIP_ID,
929+
"o:title": "It's a hat"
930+
}),
931+
xml_element("v:textbox", {}, [
932+
xml_element("w:txbxContent", {}, [
933+
_paragraph_with_style_id("textbox-content")
934+
])
935+
])
936+
])
937+
938+
nodes = self._read_embedded_images(shape_element)
939+
940+
assert_equal(2, len(nodes))
941+
image_node = nodes[0]
942+
assert_equal(documents.Image, type(image_node))
943+
assert_equal("It's a hat", image_node.alt_text)
944+
assert_is_none(image_node.size)
945+
946+
@istest
947+
def can_read_shape_elements_with_unused_style_elements(self):
948+
shape_element = xml_element("v:shape", {"style": "width:31.5pt;position:absolute;height:38.25pt"}, [
949+
xml_element("v:imagedata", {
950+
"r:id": self.IMAGE_RELATIONSHIP_ID,
951+
"o:title": "It's a hat"
952+
})
953+
])
954+
955+
image = self._read_embedded_image(shape_element)
956+
957+
assert_equal(documents.Image, type(image))
958+
assert_equal(documents.Size(width="31.5pt", height="38.25pt"), image.size)
959+
960+
@istest
961+
def can_read_shape_elements_with_inch_size_attributes(self):
962+
shape_element = xml_element("v:shape", {"style": "width:0.58in;height:0.708in"}, [
963+
xml_element("v:imagedata", {
964+
"r:id": self.IMAGE_RELATIONSHIP_ID,
965+
"o:title": "It's a hat"
966+
})
967+
])
968+
969+
image = self._read_embedded_image(shape_element)
970+
971+
assert_equal(documents.Image, type(image))
972+
assert_equal(documents.Size(width="0.58in", height="0.708in"), image.size)
973+
921974
@istest
922975
def when_imagedata_element_has_no_relationship_id_then_it_is_ignored_with_warning(self):
923976
imagedata_element = xml_element("v:imagedata")

0 commit comments

Comments
 (0)