Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
ae79d15
added the files
carrycooldude Mar 30, 2025
737f03a
Restructure LayoutLMv3 implementation to match KerasHub style
carrycooldude Apr 25, 2025
455a140
Refactor: Move LayoutLMv3 files to models directory and make code bac…
carrycooldude Apr 27, 2025
d92c8c4
refactor: Move LayoutLMv3 files to dedicated directory
carrycooldude Apr 27, 2025
0948f95
fix: Update LayoutLMv3 init files to follow correct format
carrycooldude Apr 30, 2025
3c02f78
fix: Update LayoutLMv3 backbone to follow project standards
carrycooldude Apr 30, 2025
4a79d9b
refactor: remove unnecessary files and fix imports in LayoutLMv3 module
carrycooldude May 26, 2025
c2fed4c
Add minimal stub for LayoutLMv3TransformerLayer
carrycooldude May 29, 2025
e828047
fix: resolve merge conflicts and complete rebase
carrycooldude May 30, 2025
063054d
refactor(layoutlmv3): move usage examples to class docstrings and rem…
carrycooldude Jul 4, 2025
476c0fd
style: apply code formatting and lint fixes via pre-commit
carrycooldude Jul 4, 2025
4439fad
made some changes
carrycooldude Jul 7, 2025
ad3c758
resolve the conflict issue
carrycooldude Jul 7, 2025
885f2fe
chore: update API directory and fix ruff line length in checkpoint co…
carrycooldude Jul 7, 2025
5019abb
update models
carrycooldude Jul 7, 2025
e1fc266
made changes
carrycooldude Jul 7, 2025
a32555c
chore: trigger CI
carrycooldude Jul 7, 2025
a885afa
Update API files
carrycooldude Jul 7, 2025
ad004f7
changed
carrycooldude Jul 7, 2025
6fb0fdc
chore: pre-commit fixes for layoutlmv3 __init__.py
carrycooldude Jul 7, 2025
5aaadab
chore: commit api directory after pre-commit run
carrycooldude Jul 8, 2025
8c7e989
update models
carrycooldude Jul 8, 2025
5a371a5
update layoutlmv3
carrycooldude Jul 9, 2025
bcad8d7
Fix all LayoutLMv3 issues from PR review
carrycooldude Jul 22, 2025
ca96183
Final formatting fixes for CI/CD
carrycooldude Jul 22, 2025
9c90753
Fix final ruff formatting issues
carrycooldude Jul 22, 2025
cf4b20b
Fix PyTorch backend compatibility issues - Separate ops.arange and o…
carrycooldude Jul 22, 2025
193496a
Fix PyTorch compatibility and test implementation
carrycooldude Jul 22, 2025
4d8604e
Simplify tests and fix imports to isolate PyTorch backend issue
carrycooldude Jul 22, 2025
e07224c
Fix PyTorch backend compatibility issues
carrycooldude Jul 22, 2025
6187459
Auto-fix ruff formatting issues
carrycooldude Jul 22, 2025
00fc976
Simplify LayoutLMv3 to use standard KerasHub patterns
carrycooldude Jul 22, 2025
0d3099d
Trigger fresh push - LayoutLMv3 implementation complete
carrycooldude Jul 22, 2025
82b9b93
🔧 Enhance backend compatibility and error handling
carrycooldude Jul 22, 2025
e40a6a0
Add comprehensive import error handling and fallbacks
carrycooldude Jul 22, 2025
7796cbf
Fix all code formatting issues
carrycooldude Jul 22, 2025
ae239c7
Add LayoutLMv3 exports to public API
carrycooldude Jul 22, 2025
6671da2
Revert " Add LayoutLMv3 exports to public API"
carrycooldude Jul 22, 2025
f1ac61a
Fix CI issues: bash syntax, formatting, and API generation
carrycooldude Jul 24, 2025
c83c124
Remove manual API imports - let auto-generation handle it
carrycooldude Jul 24, 2025
2ff3157
Restructure LayoutLMv3 backbone following KerasHub patterns - Follow …
carrycooldude Jul 24, 2025
87359e5
Apply comprehensive LayoutLMv3 fixes from commit bcad8d7e
carrycooldude Jul 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions keras_hub/src/models/layoutlmv3/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
"""LayoutLMv3 model."""

from keras_hub.src.models.layoutlmv3.layoutlmv3_backbone import LayoutLMv3Backbone
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This file is mainly to register presets, follow other models to understand the format we follow.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pending

from keras_hub.src.models.layoutlmv3.layoutlmv3_tokenizer import LayoutLMv3Tokenizer
from keras_hub.src.models.layoutlmv3.document_classifier import LayoutLMv3DocumentClassifier
from keras_hub.src.models.layoutlmv3.document_classifier import LayoutLMv3DocumentClassifierPreprocessor
from keras_hub.src.models.layoutlmv3.layoutlmv3_presets import backbone_presets
from keras_hub.src.utils.preset_utils import register_presets

register_presets(backbone_presets, LayoutLMv3Backbone)
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""LayoutLMv3 document classifier."""

from keras_hub.src.models.layoutlmv3.document_classifier.layoutlmv3_document_classifier import LayoutLMv3DocumentClassifier
from keras_hub.src.models.layoutlmv3.document_classifier.layoutlmv3_document_classifier_preprocessor import LayoutLMv3DocumentClassifierPreprocessor
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
"""LayoutLMv3 document classifier task model."""

import tensorflow as tf
from tensorflow import keras

from keras_hub.src.models.layoutlmv3.layoutlmv3_backbone import LayoutLMv3Backbone


@keras.saving.register_keras_serializable(package="keras_hub")
class LayoutLMv3DocumentClassifier(keras.Model):
"""LayoutLMv3 document classifier task model.

This model takes text, layout (bounding boxes) and image inputs and outputs
document classification predictions.

Args:
backbone: A LayoutLMv3Backbone instance.
num_classes: int. Number of classes to classify documents into.
dropout: float. Dropout probability for the classification head.
activation: str or callable. The activation function to use on the
classification head.
**kwargs: Additional keyword arguments.
"""

def __init__(
self,
backbone,
num_classes,
dropout=0.1,
activation="softmax",
**kwargs,
):
inputs = {
"input_ids": keras.Input(shape=(None,), dtype=tf.int32),
"bbox": keras.Input(shape=(None, 4), dtype=tf.int32),
"attention_mask": keras.Input(shape=(None,), dtype=tf.int32),
"image": keras.Input(shape=(None, None, 3), dtype=tf.float32),
}

# Get backbone outputs
backbone_outputs = backbone(inputs)
sequence_output = backbone_outputs["sequence_output"]
pooled_output = backbone_outputs["pooled_output"]

# Classification head
x = keras.layers.Dropout(dropout)(pooled_output)
outputs = keras.layers.Dense(
num_classes,
activation=activation,
name="classifier",
)(x)

super().__init__(
inputs=inputs,
outputs=outputs,
**kwargs,
)

self.backbone = backbone
self.num_classes = num_classes
self.dropout = dropout
self.activation = activation

def get_config(self):
config = super().get_config()
config.update({
"backbone": keras.saving.serialize_keras_object(self.backbone),
"num_classes": self.num_classes,
"dropout": self.dropout,
"activation": self.activation,
})
return config

@classmethod
def from_preset(
cls,
preset,
num_classes,
dropout=0.1,
activation="softmax",
**kwargs,
):
"""Create a LayoutLMv3 document classifier from a preset.

Args:
preset: string. Must be one of "layoutlmv3_base", "layoutlmv3_large".
num_classes: int. Number of classes to classify documents into.
dropout: float. Dropout probability for the classification head.
activation: str or callable. The activation function to use on the
classification head.
**kwargs: Additional keyword arguments.

Returns:
A LayoutLMv3DocumentClassifier instance.
"""
backbone = LayoutLMv3Backbone.from_preset(preset)
return cls(
backbone=backbone,
num_classes=num_classes,
dropout=dropout,
activation=activation,
**kwargs,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
"""LayoutLMv3 document classifier preprocessor.

This preprocessor inherits from Preprocessor and adds LayoutLMv3-specific
functionality for document classification.

Example:
```python
# Initialize the preprocessor
preprocessor = LayoutLMv3DocumentClassifierPreprocessor(
tokenizer=LayoutLMv3Tokenizer.from_preset("layoutlmv3_base"),
sequence_length=512,
image_size=(112, 112),
)

# Preprocess input
features = {
"text": ["Invoice #12345\nTotal: $100.00", "Receipt #67890\nTotal: $50.00"],
"bbox": [
[[0, 0, 100, 20], [0, 30, 100, 50]], # Bounding boxes for first document
[[0, 0, 100, 20], [0, 30, 100, 50]], # Bounding boxes for second document
],
"image": tf.random.uniform((2, 112, 112, 3)), # Random images for demo
}
preprocessed = preprocessor(features)
```
"""

import os
import json
import tensorflow as tf
from keras.saving import register_keras_serializable
from keras.utils import register_keras_serializable
from keras_hub.src.models.preprocessor import Preprocessor
from .layoutlmv3_tokenizer import LayoutLMv3Tokenizer

import keras
from keras import layers
from keras.src.saving import register_keras_serializable

from keras_hub.src.api_export import keras_hub_export
from keras_hub.src.models.layoutlmv3.layoutlmv3_backbone import LayoutLMv3Backbone
from keras_hub.src.utils.tensor_utils import preprocessing_function


@keras_hub_export(
[
"keras_hub.models.LayoutLMv3DocumentClassifierPreprocessor",
"keras_hub.models.LayoutLMv3Preprocessor",
]
)
@register_keras_serializable()
class LayoutLMv3DocumentClassifierPreprocessor(Preprocessor):
"""LayoutLMv3 document classifier preprocessor.

This preprocessor inherits from Preprocessor and adds LayoutLMv3-specific
functionality for document classification.

Args:
tokenizer: A LayoutLMv3Tokenizer instance.
sequence_length: The maximum sequence length to use.
image_size: A tuple of (height, width) for resizing images.
**kwargs: Additional keyword arguments.
"""

def __init__(
self,
tokenizer,
sequence_length=512,
image_size=(112, 112),
**kwargs,
):
super().__init__(
tokenizer=tokenizer,
sequence_length=sequence_length,
image_size=image_size,
**kwargs,
)

def call(self, x, y=None, sample_weight=None):
"""Process the inputs.

Args:
x: A dictionary containing:
- "text": A string or list of strings to tokenize.
- "image": A numpy array or list of numpy arrays of shape (112, 112, 3).
- "bbox": A list of bounding boxes for each token in the text.
y: Any label data. Will be passed through unaltered.
sample_weight: Any label weight data. Will be passed through unaltered.

Returns:
A tuple of (processed_inputs, y, sample_weight).
"""
# Tokenize the text
tokenized = self.tokenizer(x["text"])
input_ids = tokenized["token_ids"]
attention_mask = tokenized["attention_mask"]

# Process bounding boxes
bbox = x["bbox"]
if isinstance(bbox, list):
bbox = tf.ragged.constant(bbox)
bbox = bbox.to_tensor(shape=(None, self.sequence_length, 4))

# Process image
image = x["image"]
if isinstance(image, list):
image = tf.stack(image)
image = tf.cast(image, tf.float32)

# Pad or truncate inputs
input_ids = input_ids[:, : self.sequence_length]
attention_mask = attention_mask[:, : self.sequence_length]
bbox = bbox[:, : self.sequence_length]

# Create padding mask
padding_mask = tf.cast(attention_mask, tf.int32)

# Return processed inputs
processed_inputs = {
"input_ids": input_ids,
"bbox": bbox,
"attention_mask": attention_mask,
"image": image,
}

return processed_inputs, y, sample_weight

def get_config(self):
config = super().get_config()
config.update(
{
"tokenizer": keras.saving.serialize_keras_object(self.tokenizer),
"sequence_length": self.sequence_length,
"image_size": self.image_size,
}
)
return config

@classmethod
def from_config(cls, config):
if "tokenizer" in config:
config["tokenizer"] = keras.saving.deserialize_keras_object(
config["tokenizer"]
)
return cls(**config)

@classmethod
def from_preset(
cls,
preset,
**kwargs,
):
"""Instantiate LayoutLMv3DocumentClassifierPreprocessor from preset.

Args:
preset: string. Must be one of "layoutlmv3_base", "layoutlmv3_large".

Examples:
```python
# Load preprocessor from preset
preprocessor = LayoutLMv3DocumentClassifierPreprocessor.from_preset("layoutlmv3_base")
```
"""
if preset not in cls.presets:
raise ValueError(
"`preset` must be one of "
f"""{", ".join(cls.presets)}. Received: {preset}"""
)

metadata = cls.presets[preset]
config = metadata["config"]

# Create tokenizer
tokenizer = LayoutLMv3Tokenizer.from_preset(preset)

# Create preprocessor
preprocessor = cls(
tokenizer=tokenizer,
sequence_length=config["sequence_length"],
image_size=config["image_size"],
**kwargs,
)

return preprocessor
Loading