{
  "access": {
    "embargo": {
      "active": false,
      "reason": null
    },
    "files": "public",
    "record": "public",
    "status": "open"
  },
  "created": "2026-05-08T21:51:30.059125+00:00",
  "custom_fields": {},
  "deletion_status": {
    "is_deleted": false,
    "status": "P"
  },
  "files": {
    "count": 7,
    "default_preview": "preview_figure.png",
    "enabled": true,
    "entries": {
      "DETECT_LEAVES.zip": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:1fbfa0d4554ff494384d680017b7488f",
        "ext": "zip",
        "id": "0ddc1084-f282-4e3e-915c-f65b86b2cf40",
        "key": "DETECT_LEAVES.zip",
        "links": {
          "content": "https://zenodo.org/api/records/20090333/files/DETECT_LEAVES.zip/content",
          "self": "https://zenodo.org/api/records/20090333/files/DETECT_LEAVES.zip"
        },
        "metadata": {},
        "mimetype": "application/zip",
        "size": 9559749617,
        "storage_class": "L"
      },
      "LEAF_DATA_METADATA.csv": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:ccef9350792840d2c87c4f3b20b8c577",
        "ext": "csv",
        "id": "83337739-503c-4cf1-9e95-b3f859b4f791",
        "key": "LEAF_DATA_METADATA.csv",
        "links": {
          "content": "https://zenodo.org/api/records/20090333/files/LEAF_DATA_METADATA.csv/content",
          "self": "https://zenodo.org/api/records/20090333/files/LEAF_DATA_METADATA.csv"
        },
        "metadata": {},
        "mimetype": "text/csv",
        "size": 2228272,
        "storage_class": "L"
      },
      "LEAF_DETECTION_SUMMARY.csv": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:1908419c1ed31db610debf0156aefe3c",
        "ext": "csv",
        "id": "bdf05af1-a285-4970-a8e5-c9c1815ebdb5",
        "key": "LEAF_DETECTION_SUMMARY.csv",
        "links": {
          "content": "https://zenodo.org/api/records/20090333/files/LEAF_DETECTION_SUMMARY.csv/content",
          "self": "https://zenodo.org/api/records/20090333/files/LEAF_DETECTION_SUMMARY.csv"
        },
        "metadata": {},
        "mimetype": "text/csv",
        "size": 290,
        "storage_class": "L"
      },
      "LEAF_MASK_METADATA.csv": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:f52fa4b908bf6ddb1ebaed36ff8acdaf",
        "ext": "csv",
        "id": "da5bf850-4618-4af6-89f8-29ee8d082312",
        "key": "LEAF_MASK_METADATA.csv",
        "links": {
          "content": "https://zenodo.org/api/records/20090333/files/LEAF_MASK_METADATA.csv/content",
          "self": "https://zenodo.org/api/records/20090333/files/LEAF_MASK_METADATA.csv"
        },
        "metadata": {},
        "mimetype": "text/csv",
        "size": 6433657,
        "storage_class": "L"
      },
      "MASTER_LEAF_METADATA.csv": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:eaffe8e3c7068b8cde98a2b671430cdd",
        "ext": "csv",
        "id": "df80a9c6-44c7-43ce-8f61-48b1e3b7c392",
        "key": "MASTER_LEAF_METADATA.csv",
        "links": {
          "content": "https://zenodo.org/api/records/20090333/files/MASTER_LEAF_METADATA.csv/content",
          "self": "https://zenodo.org/api/records/20090333/files/MASTER_LEAF_METADATA.csv"
        },
        "metadata": {},
        "mimetype": "text/csv",
        "size": 6281908,
        "storage_class": "L"
      },
      "REDUCED_LEAF_DATA.zip": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:1d506b4d030f8f2657ab0e4b42889439",
        "ext": "zip",
        "id": "dbb00281-0626-4e5f-bffe-c9b428e1377f",
        "key": "REDUCED_LEAF_DATA.zip",
        "links": {
          "content": "https://zenodo.org/api/records/20090333/files/REDUCED_LEAF_DATA.zip/content",
          "self": "https://zenodo.org/api/records/20090333/files/REDUCED_LEAF_DATA.zip"
        },
        "metadata": {},
        "mimetype": "application/zip",
        "size": 1859782315,
        "storage_class": "L"
      },
      "preview_figure.png": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:54fd66fb69f87cb71c47d0a325d252ec",
        "ext": "png",
        "id": "a813631a-39ac-4156-a595-e78a7469236a",
        "key": "preview_figure.png",
        "links": {
          "content": "https://zenodo.org/api/records/20090333/files/preview_figure.png/content",
          "iiif_api": "https://zenodo.org/api/iiif/record:20090333:preview_figure.png/full/full/0/default.png",
          "iiif_base": "https://zenodo.org/api/iiif/record:20090333:preview_figure.png",
          "iiif_canvas": "https://zenodo.org/api/iiif/record:20090333/canvas/preview_figure.png",
          "iiif_info": "https://zenodo.org/api/iiif/record:20090333:preview_figure.png/info.json",
          "self": "https://zenodo.org/api/records/20090333/files/preview_figure.png"
        },
        "metadata": {
          "height": 3029,
          "width": 2428
        },
        "mimetype": "image/png",
        "size": 5504701,
        "storage_class": "L"
      }
    },
    "order": [],
    "total_bytes": 11439980760
  },
  "id": "20090333",
  "is_draft": false,
  "is_published": true,
  "links": {
    "access": "https://zenodo.org/api/records/20090333/access",
    "access_grants": "https://zenodo.org/api/records/20090333/access/grants",
    "access_links": "https://zenodo.org/api/records/20090333/access/links",
    "access_request": "https://zenodo.org/api/records/20090333/access/request",
    "access_users": "https://zenodo.org/api/records/20090333/access/users",
    "archive": "https://zenodo.org/api/records/20090333/files-archive",
    "archive_media": "https://zenodo.org/api/records/20090333/media-files-archive",
    "communities": "https://zenodo.org/api/records/20090333/communities",
    "communities-suggestions": "https://zenodo.org/api/records/20090333/communities-suggestions",
    "doi": "https://doi.org/10.5281/zenodo.20090333",
    "draft": "https://zenodo.org/api/records/20090333/draft",
    "file_modification": "https://zenodo.org/api/records/20090333/file-modification",
    "files": "https://zenodo.org/api/records/20090333/files",
    "latest": "https://zenodo.org/api/records/20090333/versions/latest",
    "latest_html": "https://zenodo.org/records/20090333/latest",
    "media_files": "https://zenodo.org/api/records/20090333/media-files",
    "parent": "https://zenodo.org/api/records/20090332",
    "parent_doi": "https://doi.org/10.5281/zenodo.20090332",
    "parent_doi_html": "https://zenodo.org/doi/10.5281/zenodo.20090332",
    "parent_html": "https://zenodo.org/records/20090332",
    "preview_html": "https://zenodo.org/records/20090333?preview=1",
    "quota_increase": "https://zenodo.org/api/records/20090333/quota-increase",
    "request_deletion": "https://zenodo.org/api/records/20090333/request-deletion",
    "requests": "https://zenodo.org/api/records/20090333/requests",
    "reserve_doi": "https://zenodo.org/api/records/20090333/draft/pids/doi",
    "self": "https://zenodo.org/api/records/20090333",
    "self_doi": "https://doi.org/10.5281/zenodo.20090333",
    "self_doi_html": "https://zenodo.org/doi/10.5281/zenodo.20090333",
    "self_html": "https://zenodo.org/records/20090333",
    "self_iiif_manifest": "https://zenodo.org/api/iiif/record:20090333/manifest",
    "self_iiif_sequence": "https://zenodo.org/api/iiif/record:20090333/sequence/default",
    "thumbnails": {
      "10": "https://zenodo.org/api/iiif/record:20090333:preview_figure.png/full/%5E10,/0/default.jpg",
      "100": "https://zenodo.org/api/iiif/record:20090333:preview_figure.png/full/%5E100,/0/default.jpg",
      "1200": "https://zenodo.org/api/iiif/record:20090333:preview_figure.png/full/%5E1200,/0/default.jpg",
      "250": "https://zenodo.org/api/iiif/record:20090333:preview_figure.png/full/%5E250,/0/default.jpg",
      "50": "https://zenodo.org/api/iiif/record:20090333:preview_figure.png/full/%5E50,/0/default.jpg",
      "750": "https://zenodo.org/api/iiif/record:20090333:preview_figure.png/full/%5E750,/0/default.jpg"
    },
    "versions": "https://zenodo.org/api/records/20090333/versions"
  },
  "media_files": {
    "count": 1,
    "enabled": true,
    "entries": {
      "preview_figure.png.ptif": {
        "access": {
          "hidden": true
        },
        "ext": "ptif",
        "id": "e35101cb-86c2-46e0-8578-c1b7002354c1",
        "key": "preview_figure.png.ptif",
        "links": {
          "content": "https://zenodo.org/api/records/20090333/files/preview_figure.png.ptif/content",
          "self": "https://zenodo.org/api/records/20090333/files/preview_figure.png.ptif"
        },
        "metadata": null,
        "mimetype": "application/octet-stream",
        "processor": {
          "source_file_id": "a813631a-39ac-4156-a595-e78a7469236a",
          "status": "finished",
          "type": "image-tiles"
        },
        "size": 0,
        "storage_class": "L"
      }
    },
    "order": [],
    "total_bytes": 0
  },
  "metadata": {
    "creators": [
      {
        "affiliations": [
          {
            "id": "edmo:3572",
            "identifiers": [
              {
                "identifier": "edmo:3572",
                "scheme": "edmo"
              }
            ],
            "name": "Michigan State University"
          }
        ],
        "person_or_org": {
          "family_name": "Chitwood",
          "given_name": "Daniel",
          "identifiers": [
            {
              "identifier": "0000-0003-4875-1447",
              "scheme": "orcid"
            }
          ],
          "name": "Chitwood, Daniel",
          "type": "personal"
        },
        "role": {
          "id": "datacurator",
          "title": {
            "de": "DatenkuratorIn",
            "en": "Data curator"
          }
        }
      }
    ],
    "description": "<p><code>DETECT_LEAVES/</code><br>\u251c\u2500\u2500 <code>0_resize_LEAF_DATA.py</code>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp;# Script to standardize image dimensions<br>\u251c\u2500\u2500 <code>1_prepare_training_data.py</code> &nbsp; &nbsp; # Script to format data for YOLO training<br>\u251c\u2500\u2500 <code>2_train_yolov26n-seg.py</code> &nbsp; &nbsp; &nbsp; &nbsp;# Model training script<br>\u251c\u2500\u2500 <code>3_infer_LEAF_DATA.py</code> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # Bulk inference on the LEAF_DATA directory<br>\u251c\u2500\u2500 <code>4_infer_training_data.py</code> &nbsp; &nbsp; &nbsp; # Inference on training data for next phase<br>\u251c\u2500\u2500 <code>5_finalize_metadata.py</code> &nbsp; &nbsp; &nbsp; &nbsp; # Merges image metadata with segmentation results<br>\u251c\u2500\u2500 <code>6_figures_and_tables.py</code> &nbsp; &nbsp; &nbsp; &nbsp;# Generates final summary stats and panel figures<br>\u251c\u2500\u2500 <code>dataset.yaml</code> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # YOLO configuration file for class names and paths<br>\u251c\u2500\u2500 <code>metadata_info/</code>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # Directory containing variety keys and mapping CSVs for each dataset<br>\u251c\u2500\u2500 <code>training_data/</code> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # Raw annotated images and labels used for model training<br>\u251c\u2500\u2500 <code>yolo26n-seg.pt</code>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # The weights of the trained segmentation model<br>\u251c\u2500\u2500 <code>runs/</code> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;# Training logs and model performance metrics, including best.pt<br>\u2514\u2500\u2500 <code>outputs/</code>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # Resulting data, figures, and summaries<br>&nbsp; &nbsp; \u251c\u2500\u2500 <code>figures/</code> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # Final panel figures and master statistics table<br>&nbsp; &nbsp; \u251c\u2500\u2500 <code>inference_full/</code>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;# Inference on reduced size images<br>&nbsp; &nbsp; \u251c\u2500\u2500 <code>infer_training_data/</code>&nbsp; &nbsp; &nbsp; # Inference results from the training dataset for the next phase<br>&nbsp; &nbsp; \u251c\u2500\u2500 <code>model_training_data/</code> &nbsp; &nbsp; &nbsp; # Splits (train/val/test) and training stats for YOLO model<br>&nbsp; &nbsp; \u2514\u2500\u2500 <code>reduced_training_data/</code>&nbsp; &nbsp; # Reduction of training data for model resolution</p>\n<p><code>LEAF_DATA/</code> # Data to infer on<br>\u251c\u2500\u2500 <code>ACADIA/</code> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # <a href=\"https://zenodo.org/records/18106125\">https://zenodo.org/records/18106125</a><br>\u251c\u2500\u2500 <code>ALGERIA/</code>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;# <a href=\"https://zenodo.org/records/16883367\">https://zenodo.org/records/16883367</a><br>\u251c\u2500\u2500 <code>BALEARIC_ISLANDS/</code>&nbsp; &nbsp;<br>&nbsp; &nbsp; \u251c\u2500\u2500 <code>Images_to_predict/</code> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # <a href=\"https://zenodo.org/records/19552346\">https://zenodo.org/records/19552346</a><br>&nbsp; &nbsp; \u251c\u2500\u2500 <code>Vitis Balearic additional scan_2018/</code> # <a href=\"https://zenodo.org/records/18391777\">https://zenodo.org/records/18391777</a><br>&nbsp; &nbsp; \u2514\u2500\u2500 <code>Vitis Balearic additional scans_2017/</code> # <a href=\"https://zenodo.org/records/18391777\">https://zenodo.org/records/18391777</a><br>\u251c\u2500\u2500 <code>CALIFORNIA/</code>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # <a href=\"https://zenodo.org/records/16883236\">https://zenodo.org/records/16883236</a><br>\u251c\u2500\u2500 <code>CHAMBOURCIN/</code>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp;# <a href=\"https://zenodo.org/records/19655877\">https://zenodo.org/records/19655877</a><br>\u251c\u2500\u2500 <code>COTTON/</code>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # <a href=\"https://zenodo.org/records/19655940\">https://zenodo.org/records/19655940</a><br>\u251c\u2500\u2500 <code>GENEVA/</code>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # <a href=\"https://zenodo.org/records/16882315\">https://zenodo.org/records/16882315</a><br>\u251c\u2500\u2500 <code>HORIZONxILLINOIS/</code>&nbsp; &nbsp; # <a href=\"https://zenodo.org/records/19655877\">https://zenodo.org/records/19655877</a><br>\u251c\u2500\u2500 <code>MISION/</code> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # <a href=\"https://zenodo.org/records/19634169\">https://zenodo.org/records/19634169</a><br>\u251c\u2500\u2500 <code>TAMU/</code>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # <a href=\"https://zenodo.org/records/17834601\">https://zenodo.org/records/17834601</a><br>\u251c\u2500\u2500 <code>UCDAVIS/</code>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;# <a href=\"https://zenodo.org/records/16883382\">https://zenodo.org/records/16883382</a><br>\u251c\u2500\u2500 <code>VITIS_CROSSES/</code>&nbsp; &nbsp; &nbsp; &nbsp;# <a href=\"https://zenodo.org/records/16882165\">https://zenodo.org/records/16882165</a><br>\u2514\u2500\u2500 <code>WOLFSKILL/</code>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;# <a href=\"https://zenodo.org/records/16882127\">https://zenodo.org/records/16882127</a></p>\n<p><code>REDUCED_LEAF_DATA/</code>&nbsp; &nbsp; &nbsp; &nbsp;# LEAF_DATA images reduced to 1024 px on longest dimension<br><code>LEAF_DATA_METADATA.csv</code>&nbsp; &nbsp;# Metadata for REDUCED_LEAF_DATA images</p>\n<p><br><code>LEAF_MASKS/</code>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;# Individually segmented leaf masks</p>\n<p>\u251c\u2500\u2500 <code>ACADIA/</code> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # <a href=\"https://zenodo.org/records/20091083\">https://zenodo.org/records/20091083</a><br>\u251c\u2500\u2500 <code>ALGERIA/</code>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;# <a href=\"https://zenodo.org/records/20091083\">https://zenodo.org/records/20091083</a><br>\u251c\u2500\u2500 <code>BALEARIC_ISLANDS/</code>&nbsp; &nbsp;# <a href=\"https://zenodo.org/records/20091083\">https://zenodo.org/records/20091083</a><br>\u251c\u2500\u2500 <code>CALIFORNIA/</code>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # <a href=\"https://zenodo.org/records/20091083\">https://zenodo.org/records/20091083</a><br>\u251c\u2500\u2500 <code>CHAMBOURCIN/</code>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp;# <a href=\"https://zenodo.org/records/20091083\">https://zenodo.org/records/20091083</a><br>\u251c\u2500\u2500 <code>COTTON/</code>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # <a href=\"https://zenodo.org/records/20091083\">https://zenodo.org/records/20091083</a><br>\u251c\u2500\u2500 <code>GENEVA/</code>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # <a href=\"https://zenodo.org/records/20091262\">https://zenodo.org/records/20091262</a><br>\u251c\u2500\u2500 <code>HORIZONxILLINOIS/</code>&nbsp; &nbsp; # <a href=\"https://zenodo.org/records/20091083\">https://zenodo.org/records/20091083</a><br>\u251c\u2500\u2500 <code>MISION/</code> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # <a href=\"https://zenodo.org/records/20091083\">https://zenodo.org/records/20091083</a><br>\u251c\u2500\u2500 <code>TAMU/</code>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # <a href=\"https://zenodo.org/records/20091083\">https://zenodo.org/records/20091083</a><br>\u251c\u2500\u2500 <code>UCDAVIS/</code>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;# <a href=\"https://zenodo.org/records/20091083\">https://zenodo.org/records/20091083</a><br>\u251c\u2500\u2500 <code>VITIS_CROSSES/</code>&nbsp; &nbsp; &nbsp; &nbsp;# <a href=\"https://zenodo.org/records/20091083\">https://zenodo.org/records/20091083</a><br>\u2514\u2500\u2500 <code>WOLFSKILL/</code>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;# <a href=\"https://zenodo.org/records/20091262\">https://zenodo.org/records/20091262</a></p>\n<p><br><code>LEAF_DETECTION_SUMMARY.csv</code> # Summary of number of segmented leaves per dataset<br><code>LEAF_MASK_METADATA.csv</code> &nbsp; &nbsp;# Metadata linking cropped leaf masks to original images<br><code>MASTER_LEAF_METADATA.csv</code> &nbsp;# Master metadata file with IDs and metadata for each leaf</p>\n<p><strong>Image Standardization and Training Data Preparation</strong></p>\n<p><code>0_resize_LEAF_DATA.py</code> and <code>1_prepare_training_data.py</code></p>\n<p>Leaf image datasets originating from multiple published repositories and institutional collections were consolidated into a unified processing framework. To standardize computational requirements across datasets while preserving geometric fidelity, all source images were resized such that the longest image dimension equaled 1024 pixels while maintaining aspect ratio. Images were converted to JPEG format and assigned unique identifiers using a balanced round-robin sampling scheme across datasets to minimize ordering biases during downstream processing. For each image, metadata including original dimensions, resized dimensions, source dataset, original file path, and scaling ratio were recorded in a master metadata table to enable exact coordinate projection between reduced-resolution and original-resolution image space.</p>\n<p>Training data for segmentation were assembled from heterogeneous annotation formats spanning multiple grapevine and plant morphology datasets. Dataset-specific directory structures and naming conventions were harmonized through automated parsing and keyword-based dataset mapping. Polygon trace annotations corresponding to individual leaf blades were identified and consolidated for each image.</p>\n<p>To reduce overrepresentation of disproportionately large datasets, a dynamic capping strategy was implemented in which the maximum image count among non-Vitis crosses datasets was used as a balancing threshold. Images were randomly shuffled prior to selection to minimize sampling bias. Summary statistics including image counts, leaf counts, leaves per image, and median megapixel size were computed for each dataset.</p>\n<p>All training images were resized to 1024 pixels on the longest axis using Lanczos resampling. Corresponding polygon annotations were converted into normalized YOLO segmentation format, where vertex coordinates were expressed relative to original image width and height. Consolidated annotation files containing all leaf polygons per image were generated for model training. To facilitate quality control, visualization overlays showing resized images with projected polygon masks were automatically produced for all processed samples.</p>\n<p>Processed datasets were partitioned into training (80%), validation (10%), and testing (10%) subsets following random shuffling with a fixed seed for reproducibility. Images and normalized polygon annotations were organized into standard YOLO directory structures for downstream segmentation model training.</p>\n<p><strong>Segmentation Model Training</strong></p>\n<p><code>2_train_yolov26n-seg.py</code></p>\n<p>A YOLOv26 nano segmentation architecture was trained using the standardized 1024-pixel image dataset and normalized polygon annotations generated during preprocessing. Model training was performed using the Ultralytics implementation of YOLO segmentation with a target image resolution of 1024 pixels and a batch size of eight images. Training proceeded for up to 500 epochs with early stopping enabled after 50 epochs without improvement.</p>\n<p>To maximize robustness to variable specimen orientation and imaging conditions across historical and contemporary botanical collections, extensive rotational augmentation was applied during training. Augmentation included random rotations spanning the full 360&deg; range (&minus;180&deg; to +180&deg;) in combination with independent horizontal and vertical image flipping. These augmentations were designed to ensure orientation-invariant leaf segmentation performance across heterogeneous datasets. Model training was resumed automatically from prior checkpoints when available to support long-duration iterative optimization and reproducibility.</p>\n<p><strong>Automated Leaf Segmentation and Individual Leaf Harvesting</strong></p>\n<p><code>3_infer_LEAF_DATA.py</code></p>\n<p>The trained YOLOv26 segmentation model was applied to the full reduced-resolution image collection to identify and segment individual leaf blades. Inference was conducted on 1024-pixel standardized images using a confidence threshold of 0.5. Predicted segmentation masks were then projected from reduced-resolution image space back into original full-resolution coordinates using the scaling metadata generated during preprocessing.</p>\n<p>For each detected leaf, polygon masks were converted into high-resolution binary alpha masks and used to extract individual leaf images from the original source image. Bounding boxes were computed from projected polygon coordinates, and square crops centered on each leaf were generated with an additional 10% padding margin to preserve blade margins and minimize edge truncation. When crop boundaries extended beyond image borders, zero-padding was applied to maintain consistent square geometry.</p>\n<p>Each extracted leaf was saved as a 1024 &times; 1024 RGBA image containing both RGB pixel information and a transparent alpha mask representing the segmented blade. Metadata describing each harvested leaf, including parent image identifier, dataset origin, original image path, original-scale bounding box coordinates, and crop transformation parameters, were recorded in a master metadata table. These transformation parameters enabled exact reprojection between cropped leaf images and original parent images for subsequent downstream analyses and iterative segmentation workflows.</p>\n<p>To support large-scale processing and fault tolerance, inference was implemented with resumable execution and incremental metadata writing. Dataset-level summary statistics describing the number of processed images and total segmented leaves were automatically generated following completion.</p>\n<p><strong>Harmonized Inference on Training Data</strong></p>\n<p><code>4_infer_training_data.py</code></p>\n<p>To ensure that downstream analyses operated on consistently segmented leaf geometries, the trained segmentation model was also applied to the original training datasets using the identical inference and cropping pipeline employed for the full inferred dataset. This procedure standardized blade extraction between annotated training material and newly segmented images, thereby minimizing systematic differences introduced by manual versus automated blade delineation.</p>\n<p>Original training datasets containing paired blade and vein annotations were first resized to standardized 1024-pixel resolution while preserving aspect ratio. Reduced-resolution copies of images and scaled annotation coordinates were generated alongside metadata linking resized images to original source files and scaling factors.</p>\n<p>The trained YOLOv26 segmentation model was then applied to the reduced training images. Predicted segmentation masks were projected back into original-resolution coordinate space and matched to manually annotated blade polygons using intersection-over-union (IoU) overlap scores. For each inferred mask, the manually annotated leaf with the highest IoU correspondence was identified, and low-confidence matches (IoU &lt; 0.1) were excluded.</p>\n<p>Matched leaves were subsequently harvested from original full-resolution images using the same square-cropping, padding, alpha masking, and 1024 &times; 1024 normalization procedures used during inference on the broader dataset. Corresponding manually annotated blade and vein coordinates were transformed into the cropped image coordinate system and rescaled to the final standardized image dimensions. This produced harmonized datasets in which both inferred leaves and original training leaves shared identical geometric preprocessing and segmentation-derived boundaries.</p>\n<p>To facilitate validation and quality control, visualization overlays displaying projected blade and vein annotations on final cropped leaf images were automatically generated. Final manifests recording matched leaf identifiers and IoU correspondence scores were exported for downstream traceability and reproducibility.</p>\n<p><strong>Metadata Integration and Harmonization</strong></p>\n<p><code>5_finalize_metadata.py</code></p>\n<p>Following large-scale leaf segmentation and harvesting, a final metadata integration stage was performed to unify image-derived leaf records with dataset-specific biological and experimental metadata. Metadata were aggregated across all contributing collections using a combination of directory traversal, filename parsing, accession mapping tables, and dataset-specific lookup files.</p>\n<p>For each source dataset, custom parsers were implemented to extract biologically relevant metadata embedded within filenames, directory structures, or auxiliary key files. Depending on dataset availability, extracted metadata included cultivar or species identity, accession identifiers, rootstock information, geographic location, collection year, grape color, abaxial versus adaxial imaging orientation, breeding population, and usage classification (e.g., wine, table, wild, or cross). Additional variety classification tables were incorporated to annotate cultivar usage types when available.</p>\n<p>To enable reliable integration across heterogeneous datasets, source image paths were normalized into standardized relative path representations prior to joining. Leaf-level segmentation metadata generated during inference were then merged with image-level biological metadata using relative source image paths as join keys. Records corresponding to images in which no leaves were detected were excluded from the final integrated dataset.</p>\n<p>The resulting master metadata table linked each harvested leaf image to its parent image, dataset of origin, original image coordinates, crop transformation parameters, and associated biological metadata. Final outputs were standardized to portable relative path structures to facilitate reproducibility and cross-platform portability. The completed master metadata table provided a unified relational framework connecting segmented leaf masks with original specimen context and experimental metadata across all datasets.</p>\n<p><strong>Summary Tables and Figure Generation</strong></p>\n<p><code>6_figures_and_tables.py</code></p>\n<p>Automated summary statistics and publication-ready visualization panels were generated from the processed training, inference, and segmentation outputs. Dataset-level summary tables were constructed by integrating statistics from preprocessing, model partitioning, and full-dataset inference stages.</p>\n<p>For each dataset, summary metrics included median training image size (megapixels), number of annotated training leaves, counts of training, validation, and testing images, total images processed during inference, and the total number of segmented leaves detected. Statistics from multiple processing stages were merged into a unified master table and exported in both comma-separated value (CSV) and Markdown formats to support downstream reporting and manuscript preparation.</p>\n<p>To visually summarize segmentation performance and dataset diversity, composite figure panels were automatically assembled from representative quality-control outputs. Training-data visualizations displaying projected annotation polygons on resized images and full inference outputs from automated leaf segmentation were organized into a two-panel grid layout. The upper panel displayed representative training annotation overlays, whereas the lower panel displayed representative segmentation outputs generated during large-scale inference. Figures were exported in both high-resolution PNG and vector PDF formats suitable for publication.</p>\n<p>The automated generation of statistical summaries and visualization panels provided a reproducible framework for documenting dataset composition, model training distributions, segmentation outputs, and quality-control assessments across the complete computational pipeline.</p>",
    "publication_date": "2026-05-08",
    "publisher": "Zenodo",
    "resource_type": {
      "id": "dataset",
      "title": {
        "de": "Datensatz",
        "en": "Dataset"
      }
    },
    "rights": [
      {
        "description": {
          "en": "The Creative Commons Attribution license allows re-distribution and re-use of a licensed work on the condition that the creator is appropriately credited."
        },
        "icon": "cc-by-icon",
        "id": "cc-by-4.0",
        "props": {
          "scheme": "spdx",
          "url": "https://creativecommons.org/licenses/by/4.0/legalcode"
        },
        "title": {
          "en": "Creative Commons Attribution 4.0 International"
        }
      }
    ],
    "title": "DETECT LEAVES"
  },
  "parent": {
    "access": {
      "owned_by": {
        "user": "121621"
      },
      "settings": {
        "accept_conditions_text": null,
        "allow_guest_requests": false,
        "allow_user_requests": false,
        "secret_link_expiration": 0
      }
    },
    "communities": {
      "default": "a1f70060-e373-47eb-88dd-54a55772d534",
      "entries": [
        {
          "access": {
            "member_policy": "open",
            "members_visibility": "public",
            "record_submission_policy": "open",
            "review_policy": "closed",
            "visibility": "public"
          },
          "children": {
            "allow": false
          },
          "created": "2025-08-15T12:59:41.255494+00:00",
          "custom_fields": {},
          "deletion_status": {
            "is_deleted": false,
            "status": "P"
          },
          "id": "a1f70060-e373-47eb-88dd-54a55772d534",
          "links": {},
          "metadata": {
            "curation_policy": "",
            "description": "A repository of 8,993 images of grapevine leaves, over 24,900 segmented masks, and the instance and pixel segmentation CNN models that produced them",
            "page": "<h3>Project Overview</h3>\n<p>This collection represents the complete, fully reproducible pipeline for a single project focused on the instance and pixel segmentation of grapevine leaves. It includes a total of&nbsp;<strong>8,993 original images</strong> and the corresponding <strong>24,940 segmented leaf masks</strong>. The collection also contains the instance and pixel segmentation CNN models used to produce these masks. The individual Zenodo uploads correspond to the major project directories and their first-level contents.</p>\n\n\n<h3>Project File Structure</h3>\n<p></p>\n<div>\n<div>\n<div>\n<pre><code>.\n\u251c\u2500\u2500 LEAF_DATA/\n\u2502   \u251c\u2500\u2500 ALGERIA/\n\u2502   \u251c\u2500\u2500 CALIFORNIA/\n\u2502   \u251c\u2500\u2500 GENEVA/\n\u2502   \u251c\u2500\u2500 UCDAVIS/\n\u2502   \u251c\u2500\u2500 VITIS_CROSSES/\n\u2502   \u2514\u2500\u2500 WOLFSKILL/\n|\n\u251c\u2500\u2500 FINAL_MASKS/\n\u2502   \u251c\u2500\u2500 png_masks/\n\u2502   \u251c\u2500\u2500 master_masks.h5\n\u2502   \u2514\u2500\u2500 master_mask_datasheet.csv\n|\n\u2514\u2500\u2500 LEAF_SEGMENTATION/\n    \u251c\u2500\u2500 models/\n    \u2514\u2500\u2500 scripts/</code></pre>\n</div>\n</div>\n</div>",
            "title": "Grapevine leaves",
            "type": {
              "id": "project"
            }
          },
          "revision_id": 6,
          "slug": "grapevine_leaves",
          "updated": "2025-08-15T21:07:25.791602+00:00"
        }
      ],
      "ids": [
        "a1f70060-e373-47eb-88dd-54a55772d534"
      ]
    },
    "id": "20090332",
    "pids": {
      "doi": {
        "client": "datacite",
        "identifier": "10.5281/zenodo.20090332",
        "provider": "datacite"
      }
    }
  },
  "pids": {
    "doi": {
      "client": "datacite",
      "identifier": "10.5281/zenodo.20090333",
      "provider": "datacite"
    },
    "oai": {
      "identifier": "oai:zenodo.org:20090333",
      "provider": "oai"
    }
  },
  "revision_id": 24,
  "stats": {
    "all_versions": {
      "data_volume": 78291645832.0,
      "downloads": 65,
      "unique_downloads": 65,
      "unique_views": 64,
      "views": 89
    },
    "this_version": {
      "data_volume": 78291645832.0,
      "downloads": 65,
      "unique_downloads": 65,
      "unique_views": 64,
      "views": 89
    }
  },
  "status": "published",
  "swh": {},
  "updated": "2026-05-09T01:06:46.638268+00:00",
  "versions": {
    "index": 1,
    "is_latest": true
  }
}