{
  "access": {
    "embargo": {
      "active": false,
      "reason": null
    },
    "files": "public",
    "record": "public",
    "status": "open"
  },
  "created": "2026-04-22T00:49:04.933712+00:00",
  "custom_fields": {
    "legacy:communities": [
      "f312c6b6-0dd6-4fb2-a224-7c4960394ac1",
      "f312c6b6-0dd6-4fb2-a224-7c4960394ac1",
      "dockstore"
    ]
  },
  "deletion_status": {
    "is_deleted": false,
    "status": "P"
  },
  "files": {
    "count": 1,
    "enabled": true,
    "entries": {
      "github.com-aofarrel-tree_nine-tree_nine_0.6.3.zip": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:c08de7eae3658157148fefc9262a3656",
        "ext": "zip",
        "id": "c4629d24-7128-4853-888d-abea2d7daa3c",
        "key": "github.com-aofarrel-tree_nine-tree_nine_0.6.3.zip",
        "links": {
          "content": "https://zenodo.org/api/records/19687896/files/github.com-aofarrel-tree_nine-tree_nine_0.6.3.zip/content",
          "self": "https://zenodo.org/api/records/19687896/files/github.com-aofarrel-tree_nine-tree_nine_0.6.3.zip"
        },
        "metadata": {},
        "mimetype": "application/zip",
        "size": 15996,
        "storage_class": "L"
      }
    },
    "order": [],
    "total_bytes": 15996
  },
  "id": "19687896",
  "is_draft": false,
  "is_published": true,
  "links": {
    "access": "https://zenodo.org/api/records/19687896/access",
    "access_grants": "https://zenodo.org/api/records/19687896/access/grants",
    "access_links": "https://zenodo.org/api/records/19687896/access/links",
    "access_request": "https://zenodo.org/api/records/19687896/access/request",
    "access_users": "https://zenodo.org/api/records/19687896/access/users",
    "archive": "https://zenodo.org/api/records/19687896/files-archive",
    "archive_media": "https://zenodo.org/api/records/19687896/media-files-archive",
    "communities": "https://zenodo.org/api/records/19687896/communities",
    "communities-suggestions": "https://zenodo.org/api/records/19687896/communities-suggestions",
    "doi": "https://doi.org/10.5281/zenodo.19687896",
    "draft": "https://zenodo.org/api/records/19687896/draft",
    "file_modification": "https://zenodo.org/api/records/19687896/file-modification",
    "files": "https://zenodo.org/api/records/19687896/files",
    "latest": "https://zenodo.org/api/records/19687896/versions/latest",
    "latest_html": "https://zenodo.org/records/19687896/latest",
    "media_files": "https://zenodo.org/api/records/19687896/media-files",
    "parent": "https://zenodo.org/api/records/15046888",
    "parent_doi": "https://doi.org/10.5281/zenodo.15046888",
    "parent_doi_html": "https://zenodo.org/doi/10.5281/zenodo.15046888",
    "parent_html": "https://zenodo.org/records/15046888",
    "preview_html": "https://zenodo.org/records/19687896?preview=1",
    "quota_increase": "https://zenodo.org/api/records/19687896/quota-increase",
    "request_deletion": "https://zenodo.org/api/records/19687896/request-deletion",
    "requests": "https://zenodo.org/api/records/19687896/requests",
    "reserve_doi": "https://zenodo.org/api/records/19687896/draft/pids/doi",
    "self": "https://zenodo.org/api/records/19687896",
    "self_doi": "https://doi.org/10.5281/zenodo.19687896",
    "self_doi_html": "https://zenodo.org/doi/10.5281/zenodo.19687896",
    "self_html": "https://zenodo.org/records/19687896",
    "self_iiif_manifest": "https://zenodo.org/api/iiif/record:19687896/manifest",
    "self_iiif_sequence": "https://zenodo.org/api/iiif/record:19687896/sequence/default",
    "versions": "https://zenodo.org/api/records/19687896/versions"
  },
  "media_files": {
    "count": 0,
    "enabled": false,
    "entries": {},
    "order": [],
    "total_bytes": 0
  },
  "metadata": {
    "creators": [
      {
        "affiliations": [
          {
            "name": "University of California Santa Cruz"
          }
        ],
        "person_or_org": {
          "family_name": "Ash O'Farrell",
          "identifiers": [
            {
              "identifier": "0000-0003-4896-1858",
              "scheme": "orcid"
            }
          ],
          "name": "Ash O'Farrell",
          "type": "personal"
        }
      }
    ],
    "description": "<h1>Tree Nine</h1>\n<p>Put diff files on an existing phylogenetic tree using <a href=\"https://www.nature.com/articles/s41588-021-00862-7\">UShER</a>'s <code>usher sampled</code> task with a bit of help from <a href=\"https://www.github.com/aofarrel/SRANWRP\">SRANWRP</a>, followed by conversion of that tree to Taxonium, Newick, and Nextstrain formats. Samples' SNP distance is calculated and output as a distance matrix, and samples will be placed into clusters based on the distance.</p>\n<p>Verified on Terra-Cromwell and miniwdl. Make sure to add <code>--copy-input-files</code> for miniwdl. Default inputs assume you're working with <em>Mycobacterium tuberculosis</em>, be sure to change them if you aren't working with that bacterium.</p>\n<p>This repo also contains the following subworkflows:</p>\n<ul>\n<li><a href=\"./annotate.md\">Annotate</a></li>\n<li><a href=\"./convert_to_nextstrain.md\">Convert to Nextstrain</a> (for viewing in Auspice, non-clade sample annotations, etc)</li>\n<li><a href=\"./extract.md\">Extract</a></li>\n<li><a href=\"./mask_tree.wdl\">Mask tree</a></li>\n<li><a href=\"./mask_subtree.wdl\">Mask subtree</a></li>\n<li><a href=\"./summarize.md\">Summarize</a></li>\n</ul>\n<h2>features</h2>\n<ul>\n<li>Highly scalable, even on lower-end computes</li>\n<li>Can input a single pre-combined diff file</li>\n<li>Includes a sample input tree created from SRA data if no input tree is specified</li>\n<li>Trees automatically converted to UsHER (.pb), Taxonium (.jsonl.gz), Newick (.nwk), and Nextstrain (.json) formats</li>\n<li>Automatic clustering based on configurable genetic distance\n<ul>\n<li>Nextstrain tree(s) will be annotated by cluster</li>\n<li>Clustering can be limited to only samples specified by the user, all newly added samples, or all samples</li>\n<li>Clustering is also performed after backmasking</li>\n<li>(optional) Create per-cluster Nextstrain subtrees</li>\n</ul>\n</li>\n<li>(optional) Reroot the tree to a specified node</li>\n<li>(optional) Backmask newly-added samples against each other to hide positions where any newly-added sample lacks data, then create a new set of trees based on the backmasked diff files\n<ul>\n<li>Designed for highly clonal samples which have a plausible direct epidemiological relationship</li>\n<li>Backmasking can only be performed on samples which have a sample-level diff files</li>\n</ul>\n</li>\n<li>(optional) Summarize input, reroot, and output trees with matutils</li>\n<li>(optional) Filter out positions by coverage at that position and/or entire samples by overall coverage</li>\n<li>(optional) Specify your own reference genome if you don't want to work with H37Rv</li>\n<li>(optional) Annotate clades via matutils with a specified annotation TSV</li>\n</ul>\n<h2>benchmarking</h2>\n<p>Formal benchmarks have not been established, but a full run of placing 60 new TB samples on an existing 7000+ TB sample tree, conversion to taxonium and newick formats, distance matrixing, clustering finding, and creating cluster-specific Nextstrain trees executes in about five minutes on a 2019 Macbook Pro.</p>\n<p>Backmasking is the least scalable part of the pipeline. The comparison itself theoretically scales <i>n<sup>2</sup></i> and once the comparison is completed, <i>n</i> backmasked disk files must be written to the disk. We have observed that memory problems tend to arise during the file-writing part when <i>n\u226555</i> on a local machine. Runtime attributes are adjustable as task-level variables to aid with scaling on cloud backends, although we have seen the default handle 60 samples at a time without much issue.</p>",
    "publication_date": "2026-04-22",
    "publisher": "Zenodo",
    "related_identifiers": [
      {
        "identifier": "https://dockstore.org/aliases/workflow-versions/10.5281-zenodo.19687896",
        "relation_type": {
          "id": "isidenticalto",
          "title": {
            "de": "Ist identisch mit",
            "en": "Is identical to"
          }
        },
        "scheme": "url"
      },
      {
        "identifier": "https://dockstore.org/workflows/github.com/aofarrel/tree_nine/tree_nine:0.6.3",
        "relation_type": {
          "id": "isidenticalto",
          "title": {
            "de": "Ist identisch mit",
            "en": "Is identical to"
          }
        },
        "scheme": "url"
      },
      {
        "identifier": "https://dockstore.org/api/ga4gh/trs/v2/tools/%23workflow%2Fgithub.com%2Faofarrel%2Ftree_nine%2Ftree_nine/versions/0.6.3/PLAIN-WDL/descriptor/tree_nine.wdl",
        "relation_type": {
          "id": "isidenticalto",
          "title": {
            "de": "Ist identisch mit",
            "en": "Is identical to"
          }
        },
        "scheme": "url"
      }
    ],
    "resource_type": {
      "id": "software",
      "title": {
        "de": "Software",
        "en": "Software"
      }
    },
    "rights": [
      {
        "description": {
          "en": "The Creative Commons Attribution license allows re-distribution and re-use of a licensed work on the condition that the creator is appropriately credited."
        },
        "icon": "cc-by-icon",
        "id": "cc-by-4.0",
        "props": {
          "scheme": "spdx",
          "url": "https://creativecommons.org/licenses/by/4.0/legalcode"
        },
        "title": {
          "en": "Creative Commons Attribution 4.0 International"
        }
      }
    ],
    "subjects": [
      {
        "subject": "auspice"
      },
      {
        "subject": "nextstrain"
      },
      {
        "subject": "pathogen"
      },
      {
        "subject": "phylogenetics"
      },
      {
        "subject": "taxonium"
      },
      {
        "subject": "usher"
      }
    ],
    "title": "github.com/aofarrel/tree_nine/tree_nine",
    "version": "0.6.3"
  },
  "parent": {
    "access": {
      "owned_by": {
        "user": "76183"
      },
      "settings": {
        "accept_conditions_text": null,
        "allow_guest_requests": false,
        "allow_user_requests": false,
        "secret_link_expiration": 0
      }
    },
    "communities": {
      "default": "f312c6b6-0dd6-4fb2-a224-7c4960394ac1",
      "entries": [
        {
          "access": {
            "member_policy": "open",
            "members_visibility": "public",
            "record_submission_policy": "open",
            "review_policy": "open",
            "visibility": "public"
          },
          "children": {
            "allow": false
          },
          "created": "2024-11-05T20:16:09.639062+00:00",
          "custom_fields": {},
          "deletion_status": {
            "is_deleted": false,
            "status": "P"
          },
          "id": "f312c6b6-0dd6-4fb2-a224-7c4960394ac1",
          "links": {},
          "metadata": {
            "description": "Dockstore is a free and open source platform for sharing reusable and scalable analytical tools and workflows.",
            "title": "Dockstore",
            "type": {
              "id": "project"
            },
            "website": "https://dockstore.org/"
          },
          "revision_id": 5,
          "slug": "dockstore",
          "updated": "2024-11-05T20:17:15.254457+00:00"
        }
      ],
      "ids": [
        "f312c6b6-0dd6-4fb2-a224-7c4960394ac1"
      ]
    },
    "id": "15046888",
    "pids": {
      "doi": {
        "client": "datacite",
        "identifier": "10.5281/zenodo.15046888",
        "provider": "datacite"
      }
    }
  },
  "pids": {
    "doi": {
      "client": "datacite",
      "identifier": "10.5281/zenodo.19687896",
      "provider": "datacite"
    },
    "oai": {
      "identifier": "oai:zenodo.org:19687896",
      "provider": "oai"
    }
  },
  "revision_id": 4,
  "stats": {
    "all_versions": {
      "data_volume": 2875264.0,
      "downloads": 223,
      "unique_downloads": 219,
      "unique_views": 770,
      "views": 777
    },
    "this_version": {
      "data_volume": 0.0,
      "downloads": 0,
      "unique_downloads": 0,
      "unique_views": 34,
      "views": 35
    }
  },
  "status": "published",
  "swh": {},
  "updated": "2026-04-22T00:49:05.099437+00:00",
  "versions": {
    "index": 20,
    "is_latest": false
  }
}