Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions docling_core/types/doc/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,9 @@ def _get_text(self, doc: Optional["DoclingDocument"] = None, **kwargs: Any) -> s
from docling_core.transforms.serializer.markdown import MarkdownDocSerializer

if doc is not None:
doc_serializer = MarkdownDocSerializer(doc=doc)
doc_serializer = kwargs.pop(
"doc_serializer", MarkdownDocSerializer(doc=doc)
)
ser_res = doc_serializer.serialize(item=self.ref.resolve(doc=doc), **kwargs)
return ser_res.text
else:
Expand Down Expand Up @@ -1692,6 +1694,9 @@ def export_to_otsl(
# Headers (column, row, section row):
# "ched", "rhed", "srow"

from docling_core.transforms.serializer.doctags import DocTagsDocSerializer

doc_serializer = DocTagsDocSerializer(doc=doc)
body = []
nrows = self.data.num_rows
ncols = self.data.num_cols
Expand All @@ -1705,7 +1710,9 @@ def export_to_otsl(
for i in range(nrows):
for j in range(ncols):
cell: TableCell = self.data.grid[i][j]
content = cell._get_text(doc=doc, **kwargs).strip()
content = cell._get_text(
doc=doc, doc_serializer=doc_serializer, **kwargs
).strip()
rowspan, rowstart = (
cell.row_span,
cell.start_row_offset_idx,
Expand Down
15 changes: 9 additions & 6 deletions examples/rich_table_cells.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,9 @@
"output_type": "stream",
"text": [
"<doctag><title>Rich tables</title>\n",
"<otsl><fcel>cell 0,0<fcel>cell 0,1<nl><fcel>cell 1,0<fcel>- list item 1\n",
"- list item 2<nl><fcel>cell 2,0<fcel>cell 2,1<nl></otsl>\n",
"<otsl><fcel>cell 0,0<fcel>cell 0,1<nl><fcel>cell 1,0<fcel><unordered_list><list_item>list item 1</list_item>\n",
"<list_item>list item 2</list_item>\n",
"</unordered_list><nl><fcel>cell 2,0<fcel>cell 2,1<nl></otsl>\n",
"</doctag>\n"
]
}
Expand Down Expand Up @@ -218,8 +219,9 @@
"name": "stdout",
"output_type": "stream",
"text": [
"<otsl><fcel>cell 0,0<fcel>cell 0,1<nl><fcel>cell 1,0<fcel>- list item 1\n",
"- list item 2<nl><fcel>cell 2,0<fcel>cell 2,1<nl></otsl>\n"
"<otsl><fcel>cell 0,0<fcel>cell 0,1<nl><fcel>cell 1,0<fcel><unordered_list><list_item>list item 1</list_item>\n",
"<list_item>list item 2</list_item>\n",
"</unordered_list><nl><fcel>cell 2,0<fcel>cell 2,1<nl></otsl>\n"
]
}
],
Expand All @@ -237,8 +239,9 @@
"name": "stdout",
"output_type": "stream",
"text": [
"<fcel>cell 0,0<fcel>cell 0,1<nl><fcel>cell 1,0<fcel>- list item 1\n",
"- list item 2<nl><fcel>cell 2,0<fcel>cell 2,1<nl>\n"
"<fcel>cell 0,0<fcel>cell 0,1<nl><fcel>cell 1,0<fcel><unordered_list><list_item>list item 1</list_item>\n",
"<list_item>list item 2</list_item>\n",
"</unordered_list><nl><fcel>cell 2,0<fcel>cell 2,1<nl>\n"
]
}
],
Expand Down
7 changes: 3 additions & 4 deletions test/data/doc/rich_table.out.dt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
<doctag><title>Rich tables</title>
<otsl><fcel>cell 0,0<fcel>cell 0,1<nl><fcel>cell 1,0<fcel>*text in italic*<nl><fcel>- list item 1
- list item 2<fcel>cell 2,1<nl><fcel>cell 3,0<fcel>| inner cell 0,0 | inner cell 0,1 | inner cell 0,2 |
|------------------|------------------|------------------|
| inner cell 1,0 | inner cell 1,1 | inner cell 1,2 |<nl></otsl>
<otsl><fcel>cell 0,0<fcel>cell 0,1<nl><fcel>cell 1,0<fcel><text>text in italic</text><nl><fcel><unordered_list><list_item>list item 1</list_item>
<list_item>list item 2</list_item>
</unordered_list><fcel>cell 2,1<nl><fcel>cell 3,0<fcel><otsl><fcel>inner cell 0,0<fcel>inner cell 0,1<fcel>inner cell 0,2<nl><fcel>inner cell 1,0<fcel>inner cell 1,1<fcel>inner cell 1,2<nl></otsl><nl></otsl>
</doctag>
Loading