{
  "access": {
    "embargo": {
      "active": false,
      "reason": null
    },
    "files": "public",
    "record": "public",
    "status": "open"
  },
  "created": "2021-05-12T06:07:22.808374+00:00",
  "custom_fields": {},
  "deletion_status": {
    "is_deleted": false,
    "status": "P"
  },
  "files": {
    "count": 1,
    "enabled": true,
    "entries": {
      "DravidianCodeMix-2020.zip": {
        "checksum": "md5:7850be52919a387f5b36c7a09b05ad87",
        "ext": "zip",
        "id": "3654ff20-8183-4803-a2c8-19a0f6a2892d",
        "key": "DravidianCodeMix-2020.zip",
        "metadata": null,
        "mimetype": "application/zip",
        "size": 10793728
      }
    },
    "order": [],
    "total_bytes": 10793728
  },
  "id": "4750858",
  "is_draft": false,
  "is_published": true,
  "links": {
    "access": "https://zenodo.org/api/records/4750858/access",
    "access_grants": "https://zenodo.org/api/records/4750858/access/grants",
    "access_links": "https://zenodo.org/api/records/4750858/access/links",
    "access_request": "https://zenodo.org/api/records/4750858/access/request",
    "access_users": "https://zenodo.org/api/records/4750858/access/users",
    "archive": "https://zenodo.org/api/records/4750858/files-archive",
    "archive_media": "https://zenodo.org/api/records/4750858/media-files-archive",
    "communities": "https://zenodo.org/api/records/4750858/communities",
    "communities-suggestions": "https://zenodo.org/api/records/4750858/communities-suggestions",
    "doi": "https://doi.org/10.5281/zenodo.4750858",
    "draft": "https://zenodo.org/api/records/4750858/draft",
    "files": "https://zenodo.org/api/records/4750858/files",
    "latest": "https://zenodo.org/api/records/4750858/versions/latest",
    "latest_html": "https://zenodo.org/records/4750858/latest",
    "media_files": "https://zenodo.org/api/records/4750858/media-files",
    "parent": "https://zenodo.org/api/records/4750857",
    "parent_doi": "https://zenodo.org/doi/10.5281/zenodo.4750857",
    "parent_html": "https://zenodo.org/records/4750857",
    "requests": "https://zenodo.org/api/records/4750858/requests",
    "reserve_doi": "https://zenodo.org/api/records/4750858/draft/pids/doi",
    "self": "https://zenodo.org/api/records/4750858",
    "self_doi": "https://zenodo.org/doi/10.5281/zenodo.4750858",
    "self_html": "https://zenodo.org/records/4750858",
    "self_iiif_manifest": "https://zenodo.org/api/iiif/record:4750858/manifest",
    "self_iiif_sequence": "https://zenodo.org/api/iiif/record:4750858/sequence/default",
    "versions": "https://zenodo.org/api/records/4750858/versions"
  },
  "media_files": {
    "count": 0,
    "enabled": false,
    "entries": {},
    "order": [],
    "total_bytes": 0
  },
  "metadata": {
    "creators": [
      {
        "affiliations": [
          {
            "name": "National University of Ireland Galway"
          }
        ],
        "person_or_org": {
          "family_name": "Chakravarthi",
          "given_name": "Bharathi Raja",
          "identifiers": [
            {
              "identifier": "0000-0002-4575-7934",
              "scheme": "orcid"
            }
          ],
          "name": "Chakravarthi, Bharathi Raja",
          "type": "personal"
        }
      },
      {
        "affiliations": [
          {
            "name": "ULTRA Arts and Science College, Madurai, Tamil Nadu, India"
          }
        ],
        "person_or_org": {
          "family_name": "Priyadharshini",
          "given_name": "Ruba",
          "name": "Priyadharshini, Ruba",
          "type": "personal"
        }
      },
      {
        "affiliations": [
          {
            "name": "Cardiff University, United Kingdom"
          }
        ],
        "person_or_org": {
          "family_name": "Muralidaran",
          "given_name": "Vigneshwaran",
          "name": "Muralidaran, Vigneshwaran",
          "type": "personal"
        }
      },
      {
        "affiliations": [
          {
            "name": "Indian Institute of Information Technology and Management-Kerala, Kerala, India"
          }
        ],
        "person_or_org": {
          "family_name": "Jose",
          "given_name": "Navya",
          "name": "Jose, Navya",
          "type": "personal"
        }
      },
      {
        "affiliations": [
          {
            "name": "National University of Ireland Galway"
          }
        ],
        "person_or_org": {
          "family_name": "Suryawanshi",
          "given_name": "Shardul",
          "name": "Suryawanshi, Shardul",
          "type": "personal"
        }
      },
      {
        "affiliations": [
          {
            "name": "National University of Ireland Galway"
          }
        ],
        "person_or_org": {
          "family_name": "McCrae",
          "given_name": "John P.",
          "name": "McCrae, John P.",
          "type": "personal"
        }
      }
    ],
    "description": "<p>This paper describes the development of a multilingual, manually annotated dataset for three under-resourced Dravidian languages generated from social media comments. The dataset was annotated for sentiment analysis and offensive language identification for a total of more than 60,000 YouTube comments. The dataset consists of around 44,000 comments in Tamil-English, around 7,000 comments in Kannada-English, and around 20,000 comments in Malayalam-English. The data was manually annotated by volunteer annotators and has a high inter-annotator agreement in Krippendorff&#39;s alpha. The dataset contains all types of code-mixing phenomena since it comprises user-generated content from a multilingual country.&nbsp; We also present baseline experiments to establish benchmarks on the dataset using machine learning methods.</p>\n\n<p>If you are using the data or code from this research then please site our paper below:</p>\n\n<p>@article{chakravarthi-etal-2021-lre,<br>\ntitle = &quot;DravidianCodeMix: Sentiment Analysis and Offensive Language Identification Dataset for Dravidian Languages in Code-Mixed Text&quot;,<br>\nauthor = &quot;Chakravarthi, Bharathi Raja&nbsp; and<br>\n&nbsp; Priyadharshini, Ruba&nbsp; and<br>\n&nbsp; Muralidaran, Vigneshwaran and<br>\n&nbsp; Jose, Navya and<br>\n&nbsp; Suryawanshi, Shardul and<br>\n&nbsp; Sherly, Elizabeth&nbsp; and<br>\n&nbsp; McCrae, John P<br>\n&nbsp; journal={Language Resources and Evaluation},<br>\n&nbsp; year={2021},<br>\n&nbsp; publisher={Springer}<br>\n}</p>\n\n<p>&nbsp;</p>",
    "publication_date": "2021-05-12",
    "publisher": "Zenodo",
    "resource_type": {
      "id": "dataset",
      "title": {
        "de": "Datensatz",
        "en": "Dataset"
      }
    },
    "rights": [
      {
        "description": {
          "en": "The Creative Commons Attribution license allows re-distribution and re-use of a licensed work on the condition that the creator is appropriately credited."
        },
        "icon": "cc-by-icon",
        "id": "cc-by-4.0",
        "props": {
          "scheme": "spdx",
          "url": "https://creativecommons.org/licenses/by/4.0/legalcode"
        },
        "title": {
          "en": "Creative Commons Attribution 4.0 International"
        }
      }
    ],
    "subjects": [
      {
        "subject": "Tamil, Malayalam, Kannada, Dravidian languages, Sentiment Analysis, Offensive langauge identification, Code-mixed, corpora"
      }
    ],
    "title": "DravidianCodeMix: Sentiment Analysis and Offensive Language Identification Dataset for Dravidian Languages in Code-Mixed Text",
    "version": "1.0"
  },
  "parent": {
    "access": {
      "owned_by": {
        "user": "129384"
      }
    },
    "communities": {},
    "id": "4750857",
    "pids": {
      "doi": {
        "client": "datacite",
        "identifier": "10.5281/zenodo.4750857",
        "provider": "datacite"
      }
    }
  },
  "pids": {
    "doi": {
      "client": "datacite",
      "identifier": "10.5281/zenodo.4750858",
      "provider": "datacite"
    },
    "oai": {
      "identifier": "oai:zenodo.org:4750858",
      "provider": "oai"
    }
  },
  "revision_id": 4,
  "stats": {
    "all_versions": {
      "data_volume": 2504144896.0,
      "downloads": 232,
      "unique_downloads": 208,
      "unique_views": 1059,
      "views": 1139
    },
    "this_version": {
      "data_volume": 2428588800.0,
      "downloads": 225,
      "unique_downloads": 201,
      "unique_views": 1039,
      "views": 1117
    }
  },
  "status": "published",
  "updated": "2021-05-12T13:48:15.803534+00:00",
  "versions": {
    "index": 1,
    "is_latest": true
  }
}