JdeRobot · dpascualhe · Jul 17, 2025 · Jul 17, 2025 · Sep 9, 2025 · Sep 9, 2025
diff --git a/README.md b/README.md
@@ -46,8 +46,8 @@ Now, we're excited to introduce ***DetectionMetrics v2***! While retaining the f
   <tr>
     <td>Object detection</td>
     <td>Image</td>
-    <td>COCO, custom formats</td>
-    <td>PyTorch</td>
+    <td>COCO, YOLO</td>
+    <td>PyTorch (tested with torchvision and torchscript-exported YOLO models)</td>
   </tr>
 </tbody>
 </table>
@@ -118,6 +118,8 @@ For detailed GUI documentation, see our [GUI guide](https://jderobot.github.io/D
 
 🧑‍🏫️ [Image Detection Tutorial](https://github.com/JdeRobot/DetectionMetrics/blob/master/examples/tutorial_image_detection.ipynb)
 
+🧑‍🏫️ [Image Detection Tutorial (YOLO)](https://github.com/JdeRobot/DetectionMetrics/blob/master/examples/tutorial_image_detection_yolo.ipynb)
+
 You can check the `examples` directory for further inspiration. If you are using *poetry*, you can run the scripts provided either by activating the created environment using `poetry shell` or directly running `poetry run python examples/<some_python_script.py>`.
 
 ## Command-line interface

diff --git a/app.py b/app.py
@@ -79,7 +79,7 @@ def browse_folder():
 
 # Initialize commonly used session state keys
 st.session_state.setdefault("dataset_path", "")
-st.session_state.setdefault("dataset_type_selectbox", "Coco")
+st.session_state.setdefault("dataset_type_selectbox", "COCO")
 st.session_state.setdefault("split_selectbox", "val")
 st.session_state.setdefault("config_option", "Manual Configuration")
 st.session_state.setdefault("confidence_threshold", 0.5)
@@ -97,23 +97,23 @@ def browse_folder():
         # First row: Type and Split
         col1, col2 = st.columns(2)
         with col1:
-            st.selectbox(
+            dataset_type_selectbox = st.selectbox(
                 "Type",
-                ["Coco", "Custom"],
+                ["COCO", "YOLO"],
                 key="dataset_type_selectbox",
             )
         with col2:
             st.selectbox(
                 "Split",
-                ["train", "val"],
+                ["train", "val", "test"],
                 key="split_selectbox",
             )
 
         # Second row: Path and Browse button
         col1, col2 = st.columns([3, 1])
         with col1:
             dataset_path_input = st.text_input(
-                "Dataset Folder Path",
+                "Dataset Folder",
                 value=st.session_state.get("dataset_path", ""),
                 key="dataset_path_input",
             )
@@ -129,15 +129,32 @@ def browse_folder():
                 elif folder is not None:
                     st.warning("Selected path is not a valid folder.")
                 else:
-                    st.warning("Could not open folder browser. Please enter the path manually")
+                    st.warning(
+                        "Could not open folder browser. Please enter the path manually"
+                    )
 
         if dataset_path_input != st.session_state.get("dataset_path", ""):
             st.session_state["dataset_path"] = dataset_path_input
+        if dataset_type_selectbox != st.session_state.get("dataset_type", ""):
+            st.session_state["dataset_type"] = dataset_type_selectbox
+
+        # Additional input for YOLO config file
+        if dataset_type_selectbox == "YOLO":
+            dataset_config_file_uploader = st.file_uploader(
+                "Dataset Configuration (.yaml)",
+                type=["yaml"],
+                key="dataset_config_file",
+                help="Upload a YAML dataset configuration file.",
+            )
+            if dataset_config_file_uploader != st.session_state.get(
+                "dataset_config_file", None
+            ):
+                st.session_state["dataset_config_file"] = dataset_config_file_uploader
 
     with st.expander("Model Inputs", expanded=False):
         st.file_uploader(
-            "Model File (.pt, .onnx, .h5, .pb, .pth)",
-            type=["pt", "onnx", "h5", "pb", "pth"],
+            "Model File (.pt, .onnx, .h5, .pb, .pth, .torchscript)",
+            type=["pt", "onnx", "h5", "pb", "pth", "torchscript"],
             key="model_file",
             help="Upload your trained model file.",
         )
@@ -199,6 +216,17 @@ def browse_folder():
                     index=0 if st.session_state.get("device", "cpu") == "cpu" else 1,
                     key="device",
                 )
+                st.selectbox(
+                    "Model Format",
+                    ["torchvision", "YOLO"],
+                    index=(
+                        0
+                        if st.session_state.get("model_format", "torchvision")
+                        == "torchvision"
+                        else 1
+                    ),
+                    key="model_format",
+                )
                 st.number_input(
                     "Batch Size",
                     min_value=1,
@@ -264,13 +292,15 @@ def browse_folder():
                     device = st.session_state.get("device", "cpu")
                     batch_size = int(st.session_state.get("batch_size", 1))
                     evaluation_step = int(st.session_state.get("evaluation_step", 5))
+                    model_format = st.session_state.get("model_format", "torchvision")
                     config_data = {
                         "confidence_threshold": confidence_threshold,
                         "nms_threshold": nms_threshold,
                         "max_detections_per_image": max_detections,
                         "device": device,
                         "batch_size": batch_size,
                         "evaluation_step": evaluation_step,
+                        "model_format": model_format.lower(),
                     }
                     with tempfile.NamedTemporaryFile(
                         delete=False, suffix=".json", mode="w"

diff --git a/detectionmetrics/datasets/coco.py b/detectionmetrics/datasets/coco.py
@@ -92,14 +92,14 @@ def __init__(self, annotation_file: str, image_dir: str, split: str = "train"):
     def read_annotation(
         self, fname: str
     ) -> Tuple[List[List[float]], List[int], List[int]]:
-        """Return bounding boxes, labels, and category_ids for a given image ID.
+        """Return bounding boxes and category indices for a given image ID.
 
         This method uses COCO's efficient indexing to load annotations on-demand.
         The COCO object maintains an internal index that allows for very fast
         annotation retrieval without needing a separate cache.
 
         :param fname: str (image_id in string form)
-        :return: Tuple of (boxes, labels, category_ids)
+        :return: Tuple of (boxes, category_indices)
         """
         # Extract image ID (fname might be a path or ID string)
         try:
@@ -112,11 +112,10 @@ def read_annotation(
         ann_ids = self.coco.getAnnIds(imgIds=image_id)
         anns = self.coco.loadAnns(ann_ids)
 
-        boxes, labels, category_ids = [], [], []
+        boxes, category_indices = [], []
         for ann in anns:
             x, y, w, h = ann["bbox"]
             boxes.append([x, y, x + w, y + h])
-            labels.append(ann["category_id"])
-            category_ids.append(ann["category_id"])
+            category_indices.append(ann["category_id"])
 
-        return boxes, labels, category_ids
+        return boxes, category_indices
diff --git a/detectionmetrics/datasets/yolo.py b/detectionmetrics/datasets/yolo.py
@@ -0,0 +1,135 @@
+from glob import glob
+import os
+from typing import Tuple, List, Optional
+
+import pandas as pd
+from PIL import Image
+
+from detectionmetrics.datasets.detection import ImageDetectionDataset
+from detectionmetrics.utils import io as uio
+
+
+def build_dataset(
+    dataset_fname: str, dataset_dir: Optional[str] = None, im_ext: str = "jpg"
+) -> Tuple[pd.DataFrame, dict]:
+    """Build dataset and ontology dictionaries from YOLO dataset structure
+
+    :param dataset_fname: Path to the YAML dataset configuration file
+    :type dataset_fname: str
+    :param dataset_dir: Path to the directory containing images and annotations. If not provided, it will be inferred from the dataset file
+    :type dataset_dir: Optional[str]
+    :param im_ext: Image file extension (default is "jpg")
+    :type im_ext: str
+    :return: Dataset DataFrame and ontology dictionary
+    :rtype: Tuple[pd.DataFrame, dict]
+    """
+    # Read dataset configuration from YAML file
+    assert os.path.isfile(dataset_fname), f"Dataset file not found: {dataset_fname}"
+    dataset_info = uio.read_yaml(dataset_fname)
+
+    # Check that image directory exists
+    if dataset_dir is None:
+        dataset_dir = dataset_info["path"]
+    assert os.path.isdir(dataset_dir), f"Dataset directory not found: {dataset_dir}"
+
+    # Build ontology from dataset configuration
+    ontology = {}
+    for idx, name in dataset_info["names"].items():
+        ontology[name] = {
+            "idx": idx,
+            "rgb": [0, 0, 0],  # Placeholder; YAML doesn't define RGB colors
+        }
+
+    # Build dataset DataFrame
+    rows = []
+    for split in ["train", "val", "test"]:
+        if split in dataset_info:
+            images_dir = os.path.join(dataset_dir, dataset_info[split])
+            labels_dir = os.path.join(
+                dataset_dir, dataset_info[split].replace("images", "labels")
+            )
+            for label_fname in glob(os.path.join(labels_dir, "*.txt")):
+                label_basename = os.path.basename(label_fname)
+                image_basename = label_basename.replace(".txt", f".{im_ext}")
+                image_fname = os.path.join(images_dir, image_basename)
+                os.path.basename(image_fname)
+                if not os.path.isfile(image_fname):
+                    continue
+
+                rows.append(
+                    {
+                        "image": os.path.join("images", split, image_basename),
+                        "annotation": os.path.join("labels", split, label_basename),
+                        "split": split,
+                    }
+                )
+
+    dataset = pd.DataFrame(rows)
+    dataset.attrs = {"ontology": ontology}
+
+    return dataset, ontology, dataset_dir
+
+
+class YOLODataset(ImageDetectionDataset):
+    """
+    Specific class for YOLO-styled object detection datasets.
+
+    :param dataset_fname: Path to the YAML dataset configuration file
+    :type dataset_fname: str
+    :param dataset_dir: Path to the directory containing images and annotations. If not provided, it will be inferred from the dataset file
+    :type dataset_dir: Optional[str]
+    :param im_ext: Image file extension (default is "jpg")
+    :type im_ext: str
+    """
+
+    def __init__(
+        self, dataset_fname: str, dataset_dir: Optional[str], im_ext: str = "jpg"
+    ):
+        # Build dataset using the same COCO object
+        dataset, ontology, dataset_dir = build_dataset(
+            dataset_fname, dataset_dir, im_ext
+        )
+
+        self.im_ext = im_ext
+        super().__init__(dataset=dataset, dataset_dir=dataset_dir, ontology=ontology)
+
+    def read_annotation(
+        self, fname: str, image_size: Optional[Tuple[int, int]] = None
+    ) -> Tuple[List[List[float]], List[int], List[int]]:
+        """Return bounding boxes, and category indices for a given image ID.
+
+        :param fname: Annotation path
+        :type fname: str
+        :param image_size: Corresponding image size in (w, h) format for converting relative bbox size to absolute. If not provided, we will assume image path
+        :type image_size: Optional[Tuple[int, int]]
+        :return: Tuple of (boxes, category_indices)
+        """
+        label = uio.read_txt(fname)
+        image_fname = fname.replace(".txt", f".{self.im_ext}")
+        image_fname = image_fname.replace("labels", "images")
+        if image_size is None:
+            image_size = Image.open(image_fname).size
+
+        boxes = []
+        category_indices = []
+
+        im_w, im_h = image_size
+        for row in label:
+            category_idx, xc, yc, w, h = map(float, row.split())
+            category_indices.append(int(category_idx))
+
+            abs_xc = xc * im_w
+            abs_yc = yc * im_h
+            abs_w = w * im_w
+            abs_h = h * im_h
+
+            boxes.append(
+                [
+                    abs_xc - abs_w / 2,
+                    abs_yc - abs_h / 2,
+                    abs_xc + abs_w / 2,
+                    abs_yc + abs_h / 2,
+                ]
+            )
+
+        return boxes, category_indices