Dataset Open Access

Host network traffic time series 2019/01

Jirsik, Tomas; Smeriga, Juraj


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.2669079">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Dataset"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.2669079</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.2669079"/>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Jirsik, Tomas</foaf:name>
        <foaf:givenName>Tomas</foaf:givenName>
        <foaf:familyName>Jirsik</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Masaryk University</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description>
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <foaf:name>Smeriga, Juraj</foaf:name>
        <foaf:givenName>Juraj</foaf:givenName>
        <foaf:familyName>Smeriga</foaf:familyName>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>Masaryk University</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>Host network traffic time series 2019/01</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2019</dct:issued>
    <dcat:keyword>network traffic</dcat:keyword>
    <dcat:keyword>time series</dcat:keyword>
    <dcat:keyword>host</dcat:keyword>
    <dcat:keyword>clustering</dcat:keyword>
    <dcat:keyword>classification</dcat:keyword>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2019-05-06</dct:issued>
    <dct:language rdf:resource="http://publications.europa.eu/resource/authority/language/ENG"/>
    <owl:sameAs rdf:resource="https://zenodo.org/record/2669079"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/2669079</skos:notation>
        <adms:schemeAgency>url</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <dct:isVersionOf rdf:resource="https://doi.org/10.5281/zenodo.2669078"/>
    <owl:versionInfo>1.0.0</owl:versionInfo>
    <dct:description>&lt;p&gt;&lt;em&gt;&lt;strong&gt;General info&lt;/strong&gt;&lt;/em&gt;&lt;/p&gt; &lt;p&gt;Dataset&amp;nbsp;was collected over one &lt;strong&gt;month period in January 2019&lt;/strong&gt;. The observation points for the collection of IP flows were located at the borders of the university campus network. The campus university network has /16 CIDR IPv4 network range at disposal and contains various network segments from segments connecting dormitories, over server segments, to a segment containing working stations of university administrative workers. The size of the raw IP flows used to create the dataset was over 860GB. &lt;strong&gt;A host in our dataset is identified by its source IPv4 address. &amp;nbsp;&lt;/strong&gt;&lt;br&gt; &amp;nbsp;&lt;/p&gt; &lt;p&gt;&lt;em&gt;&lt;strong&gt;Variables&lt;/strong&gt;&lt;/em&gt;&lt;/p&gt; &lt;p&gt;The dataset contains the following variables:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;&lt;strong&gt;Aggregations&lt;/strong&gt; - created from five-minute total volumes aggregated&amp;nbsp;over&amp;nbsp;one-hour disjoint windows using&amp;nbsp;mean/max/min aggregation functions &lt;ul&gt; &lt;li&gt;&lt;strong&gt;# of flows (FL) &lt;/strong&gt;- number of flows for a given source IP&amp;nbsp;&lt;/li&gt; &lt;li&gt;&lt;strong&gt;# of packets (PKT)&lt;/strong&gt; -&amp;nbsp;number of packets for a given source IP&lt;/li&gt; &lt;li&gt;&lt;strong&gt;# of bytes (BYT)&lt;/strong&gt; -&amp;nbsp;number of packets for a given source IP&lt;/li&gt; &lt;li&gt;&lt;strong&gt;flow duration (DUR)&lt;/strong&gt; - average flow duration in seconds&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;li&gt;&lt;strong&gt;Distinct Counts&amp;nbsp;&lt;/strong&gt;- count of distinct values for each variable in five-minute window aggregated&amp;nbsp;over&amp;nbsp;one-hour disjoint windows using&amp;nbsp;mean/max/min aggregation functions &lt;ul&gt; &lt;li&gt;&lt;strong&gt;# of peers (PEER)&lt;/strong&gt; - number of distinct communication peers for a given source IP&lt;/li&gt; &lt;li&gt;&lt;strong&gt;# of ports (PORTS)&lt;/strong&gt; - number of distinct destination ports&amp;nbsp;for a given source IP&lt;/li&gt; &lt;li&gt;&lt;strong&gt;# of protocols (PROTO)&lt;/strong&gt; - number of distinct communication protocols&amp;nbsp;for a given source IP&lt;/li&gt; &lt;li&gt;&lt;strong&gt;# of AS numbers (AS)&lt;/strong&gt; - number of distinct destination AS numbers for a given source IP&lt;/li&gt; &lt;li&gt;&lt;strong&gt;# of countries (CTRY)&lt;/strong&gt; - number of distinct destination countries&amp;nbsp;for a given source IP&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;li&gt;&lt;strong&gt;Labels&lt;/strong&gt; &lt;ul&gt; &lt;li&gt;&lt;strong&gt;Range (RNG)&lt;/strong&gt; - a network range a host belongs to (anonymized)&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Unit (UNT) &lt;/strong&gt;- an administrative unit owning the network range&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Sub-unit (SUB-UNT)&lt;/strong&gt; - a sub-unit of the unit&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;/ul&gt; &lt;p&gt;&amp;nbsp;&lt;/p&gt; &lt;p&gt;&lt;em&gt;&lt;strong&gt;Dataset format&lt;/strong&gt;&lt;/em&gt;&lt;/p&gt; &lt;ul&gt; &lt;li&gt;The dataset is in &lt;strong&gt;comma-separated values (CSV)&lt;/strong&gt; format.&amp;nbsp;&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Header&lt;/strong&gt; - multilevel, first 3 lines &lt;ul&gt; &lt;li&gt;1 level - aggregation type {mean|min|max}&lt;/li&gt; &lt;li&gt;2 level - variable {see above}&lt;/li&gt; &lt;li&gt;3 level - hour of a day {00,01,02,03,...,22,23}&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;li&gt;&lt;strong&gt;Lablels&lt;/strong&gt; - last 4 columns&lt;/li&gt; &lt;li&gt;&lt;strong&gt;Dataset size&amp;nbsp;&lt;/strong&gt; &lt;ul&gt; &lt;li&gt;rows: 65536 host records&amp;nbsp;+ 3 headers&lt;/li&gt; &lt;li&gt;columns: 648 variables + 4 labels&lt;/li&gt; &lt;/ul&gt; &lt;/li&gt; &lt;/ul&gt; &lt;p&gt;&amp;nbsp;&lt;/p&gt;</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:license rdf:resource="https://creativecommons.org/licenses/by/4.0/legalcode"/>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.2669079"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.2669079"/>
        <dcat:byteSize>158716697</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/2669079/files/host-network-traffic-time-series-2019-01-annon.csv"/>
        <dcat:mediaType>text/csv</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>
380
354
views
downloads
All versions This version
Views 380380
Downloads 354354
Data volume 56.2 GB56.2 GB
Unique views 335335
Unique downloads 245245

Share

Cite as