{ "datafile": { "entity_name": "datafile", "entity_definition": "File of a single image that was extracted from either a camera or a sonar stream. Ready to use for primary processing, e.g. machine learning.", "relations": { "modality": "many-to-one", "sample": "many-to-many", "stream": "many-to-one"}, "attributes": { "datafile_id": { "attribute_name": "datafile_id", "attribute_definition": "Primary key of the datafile table.", "sqlite_datatype": "integer", "unique": true, "nullable": false, "unit": null}, "datafile_name": { "attribute_name": "datafile_name", "attribute_definition": "Name of the datafile. Last part of the datafile path when split at '/'. The file encoding can be inferred from the filename extension, which is either '.png' or '.jpg'.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}, "datafile_path": { "attribute_name": "datafile_path", "attribute_definition": "Relative file tree path at which the datafile is stored. The format is ${session_key}/${dataset_key}/${modality_key}/${datafile_name}. Natural key of the datafile table.", "sqlite_datatype": "text", "unique": true, "nullable": false, "unit": null}, "datafile_timestamp": { "attribute_name": "datafile_timestamp", "attribute_definition": "Datetime at which the data contained in the file is collected. ISO 8601 formatted with explicit local time zone information. Millisecond resolution.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}, "datafile_timestamp_epoch": { "attribute_name": "datafile_timestamp_epoch", "attribute_definition": "Timestamp at which the data contained in the file is collected. Seconds since the UNIX epoch, 1970-01-01 00:00:00 UTC. Millisecond resolution.", "sqlite_datatype": "real", "unique": false, "nullable": false, "unit": "s"}, "modality_id": { "attribute_name": "modality_id", "attribute_definition": "Foreign key to the modality table.", "sqlite_datatype": "integer", "unique": false, "nullable": false, "unit": null}, "datafile_size": { "attribute_name": "datafile_size", "attribute_definition": "Size of the datafile in bytes.", "sqlite_datatype": "integer", "unique": false, "nullable": false, "unit": "B"}, "stream_id": { "attribute_name": "stream_id", "attribute_definition": "Foreign key to the stream table.", "sqlite_datatype": "integer", "unique": false, "nullable": false, "unit": null}}}, "dataset": { "entity_name": "dataset", "entity_definition": "At the raw data stage: Contiguous recording of one or multiple sensor (camera and/or sonar) data streams within the same time frame during a single data collection session. At the processed data stage: Set of all samples (uni- or multimodal, depending on the available streams), that are extracted from the same raw dataset.]", "relations": { "sample": "one-to-many", "session": "many-to-one", "stream": "one-to-many"}, "attributes": { "dataset_comments": { "attribute_name": "dataset_comments", "attribute_definition": "Optional note, reporting unexpected events related to a dataset recording, or unusual features of a dataset.", "sqlite_datatype": "text", "unique": false, "nullable": true, "unit": null}, "dataset_description": { "attribute_name": "dataset_description", "attribute_definition": "Description of the scene that is captured by the dataset. Typically covers actions performed by the diver(s), and objects that are visible during the recording. In early datasets, the description is sometimes missing. The missing value is 'N/A'.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}, "dataset_duration": { "attribute_name": "dataset_duration", "attribute_definition": "Time interval between start and end of the dataset, in seconds.", "sqlite_datatype": "real", "unique": false, "nullable": false, "unit": "s"}, "dataset_end": { "attribute_name": "dataset_end", "attribute_definition": "Datetime at which the dataset ends. Equal to the logfile_end of the latest logfile of the dataset. ISO 8601 formatted with explicit local time zone information. Millisecond resolution.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}, "dataset_end_epoch": { "attribute_name": "dataset_end_epoch", "attribute_definition": "Timestamp at which the dataset ends. Equal to the logfile_end_epoch of the latest logfile of the dataset. Seconds since the UNIX epoch, 1970-01-01 00:00:00 UTC. Millisecond resolution.", "sqlite_datatype": "real", "unique": false, "nullable": false, "unit": "s"}, "dataset_id": { "attribute_name": "dataset_id", "attribute_definition": "Primary key of the dataset table.", "sqlite_datatype": "integer", "unique": true, "nullable": false, "unit": null}, "dataset_key": { "attribute_name": "dataset_key", "attribute_definition": "Name of a directory that contains the logfiles of a dataset, or its datafiles grouped by modality. Subdirectory of a session directory. Natural key of the dataset table. Made up of a prefix and a suffix: ${dataset_key} = ${prefix}${suffix}. The prefix represents the local start datetime of the dataset in minute resolution, following ISO 8601 as much as possible for a directory name: ${prefix} = %Y-%m-%d-%H-%M. The suffix is a normalized version of the dataset title, in lower case ASCII and with underscores instead of whitespace: ${suffix} = _[a-z0-9_-]*. In early datasets, the dataset title -- and therefore the suffix -- is sometimes missing.", "sqlite_datatype": "text", "unique": true, "nullable": false, "unit": null}, "dataset_start": { "attribute_name": "dataset_start", "attribute_definition": "Datetime at which the dataset starts. Equal to the logfile_start of the earliest logfile of the dataset. ISO 8601 formatted with explicit local time zone information. Millisecond resolution.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}, "dataset_start_epoch": { "attribute_name": "dataset_start_epoch", "attribute_definition": "Timestamp at which the dataset starts. Equal to the logfile_start_epoch of the earliest logfile of the dataset. Seconds since the UNIX epoch, 1970-01-01 00:00:00 UTC. Millisecond resolution.", "sqlite_datatype": "real", "unique": false, "nullable": false, "unit": "s"}, "dataset_title": { "attribute_name": "dataset_title", "attribute_definition": "Core theme of the dataset, sketched in a few words, to serve as a reminder. In early datasets, the title is sometimes missing. The missing value is 'N/A'.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}, "objects": { "attribute_name": "objects", "attribute_definition": "List of objects that are visible at some point in the recording. In early datasets, the list of objects is sometimes missing. The missing value is 'N/A'.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}, "session_id": { "attribute_name": "session_id", "attribute_definition": "Foreign key to the session table.", "sqlite_datatype": "integer", "unique": false, "nullable": false, "unit": null}, "visibility": { "attribute_name": "visibility", "attribute_definition": "Description of the visibility conditions during the dataset recording. In early datasets, the visibility is sometimes missing. The missing value is 'N/A'.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}}}, "location": { "entity_name": "location", "entity_definition": "Physical location at which a data collection session is conducted. Bounded contiguous region on the earth surface, represented by a single geolocation point.", "relations": { "session": "one-to-many"}, "attributes": { "address": { "attribute_name": "address", "attribute_definition": "Postal address of the location.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}, "latitude": { "attribute_name": "latitude", "attribute_definition": "Latitude of the representative point.", "sqlite_datatype": "real", "unique": false, "nullable": false, "unit": "deg"}, "location_description": { "attribute_name": "location_description", "attribute_definition": "Description of the location.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}, "location_id": { "attribute_name": "location_id", "attribute_definition": "Primary key of the location table.", "sqlite_datatype": "integer", "unique": true, "nullable": false, "unit": null}, "location_key": { "attribute_name": "location_key", "attribute_definition": "Textual identifier of the location, used as suffix of session keys. Natural key of the location table.", "sqlite_datatype": "text", "unique": true, "nullable": false, "unit": null}, "location_name_english": { "attribute_name": "location_name_english", "attribute_definition": "Name of the location in English.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}, "location_name_local": { "attribute_name": "location_name_local", "attribute_definition": "Name of the location in local language (German).", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}, "longitude": { "attribute_name": "longitude", "attribute_definition": "Longitude of the representative point.", "sqlite_datatype": "real", "unique": false, "nullable": false, "unit": "deg"}}}, "logfile": { "entity_name": "logfile", "entity_definition": "File in which one or (potentially) many raw sensor streams of the same dataset are persisted. In the current data corpus, each stream is persisted in a separate logfile. Since this may not be the case in other corpora, a more general model is applied here.", "relations": { "stream": "one-to-many"}, "attributes": { "compression_ratio": { "attribute_name": "compression_ratio", "attribute_definition": "Ratio at which the data is compressed. ROS specific.", "sqlite_datatype": "real", "unique": false, "nullable": false, "unit": null}, "compression_type": { "attribute_name": "compression_type", "attribute_definition": "Type of compression. ROS specific. Missing (i.e. NULL) if the data is not compressed.", "sqlite_datatype": "text", "unique": false, "nullable": true, "unit": null}, "logfile_duration": { "attribute_name": "logfile_duration", "attribute_definition": "Time interval between start and end of the logfile recording, in seconds.", "sqlite_datatype": "real", "unique": false, "nullable": false, "unit": "s"}, "logfile_end": { "attribute_name": "logfile_end", "attribute_definition": "Datetime at which the logfile recording ends. ISO 8601 formatted with explicit local time zone information. Millisecond resolution.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}, "logfile_end_epoch": { "attribute_name": "logfile_end_epoch", "attribute_definition": "Timestamp at which the logfile recording ends. Seconds since the UNIX epoch, 1970-01-01 00:00:00 UTC. Millisecond resolution.", "sqlite_datatype": "real", "unique": false, "nullable": false, "unit": "s"}, "logfile_id": { "attribute_name": "logfile_id", "attribute_definition": "Primary key of the logfile table.", "sqlite_datatype": "integer", "unique": true, "nullable": false, "unit": null}, "logfile_key": { "attribute_name": "logfile_key", "attribute_definition": "Name of the logfile. Last part of the logfile path when split at '/'. Natural key of the logfile table. Made up of a stem and an extension: ${logfile_key} = ${stem}${extension}. The extension is either '.bag' if the logfile exists, or '.dummy' if not: ${extension} = {'.bag','.dummy'}; dummy logfiles are created to collect provenance information of datafiles whose logfiles are managed outside the corpus. The stem consists of a prefix and a suffix, joint by a hyphen: ${stem} = ${prefix}-${suffix}. The prefix is a sensor key (which is possible, because the logfile always contains a single stream from a single sensor): ${prefix} = ${sensor_key}. The suffix represents the local start datetime of the logfile in second resolution, following ISO 8601 as much as possible for a file name: ${suffix} = %Y-%m-%d-%H-%M-%S.", "sqlite_datatype": "text", "unique": true, "nullable": false, "unit": null}, "logfile_path": { "attribute_name": "logfile_path", "attribute_definition": "Relative file tree path at which the logfile is stored. The format is ${session_key}/${dataset_key}/${modality_key}/${logfile_key}. Another natural key of the logfile table.", "sqlite_datatype": "text", "unique": true, "nullable": false, "unit": null}, "logfile_start": { "attribute_name": "logfile_start", "attribute_definition": "Datetime at which the logfile recording starts. ISO 8601 formatted with explicit local time zone information. Millisecond resolution.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}, "logfile_start_epoch": { "attribute_name": "logfile_start_epoch", "attribute_definition": "Timestamp at which the logfile recording starts. Seconds since the UNIX epoch, 1970-01-01 00:00:00 UTC. Millisecond resolution.", "sqlite_datatype": "real", "unique": false, "nullable": false, "unit": "s"}, "md5sum": { "attribute_name": "md5sum", "attribute_definition": "MD5 checksum of the logfile. Empty string (not NULL) when the logfile is a dummy.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}, "logfile_size": { "attribute_name": "logfile_size", "attribute_definition": "Size of the logfile in bytes.", "sqlite_datatype": "integer", "unique": false, "nullable": false, "unit": "B"}}}, "modality": { "entity_name": "modality", "entity_definition": "Distinct type or channel of information, such as text, image, or audio, persisted with a certain type of datafile. Not the same as a sensor type, although often associated with a sensor type or types: A particular sensor (type) may be the source of multiple modalities; a particular modality may originate from multiple sensors. Due to this many-to-many relation, modality is modeled as a property of a datafile, not of a sensor.", "relations": { "datafile": "one-to-many"}, "attributes": { "modality_description": { "attribute_name": "modality_description", "attribute_definition": "Description of the modality.", "sqlite_datatype": "text", "unique": true, "nullable": false, "unit": null}, "modality_id": { "attribute_name": "modality_id", "attribute_definition": "Primary key of the modality table.", "sqlite_datatype": "integer", "unique": true, "nullable": false, "unit": null}, "modality_key": { "attribute_name": "modality_key", "attribute_definition": "Name of a directory that contains the datafiles of this modality in a given dataset. Subdirectory of a dataset directory. Natural key of the modality table.", "sqlite_datatype": "text", "unique": true, "nullable": false, "unit": null}, "modality_name": { "attribute_name": "modality_name", "attribute_definition": "Name of the modality for human consumption, i.e. not subject to the restricted character set valid for keys ([a-z0-9_-]). Another natural key of the modality table.", "sqlite_datatype": "text", "unique": true, "nullable": false, "unit": null}}}, "sample": { "entity_name": "sample", "entity_definition": "Set of datafiles from the same dataset that are captured at approximately the same time. Typically, a sample consists of a camera image and a sonar image. In the data file tree, a list of all samples of a dataset is contained in a file named samples.json. This information is replicated in the metadatabase with the matching table datafile__sample, covering all datasets.", "relations": { "datafile": "many-to-many", "dataset": "many-to-one"}, "attributes": { "dataset_id": { "attribute_name": "dataset_id", "attribute_definition": "Foreign key to the dataset table.", "sqlite_datatype": "integer", "unique": false, "nullable": false, "unit": null}, "sample_id": { "attribute_name": "sample_id", "attribute_definition": "Primary key of the sample table.", "sqlite_datatype": "integer", "unique": true, "nullable": false, "unit": null}, "samples_json_index": { "attribute_name": "samples_json_index", "attribute_definition": "Index (zero based) of the sample in the samples.json file for the respective dataset. The pair (samples_json_path, samples_json_index) is a natural key of the sample table.", "sqlite_datatype": "integer", "unique": false, "nullable": false, "unit": null}, "samples_json_path": { "attribute_name": "samples_json_path", "attribute_definition": "Relative file tree path of the respective samples.json file. The format is ${session_key}/${dataset_key}/samples.json. The pair (samples_json_path, samples_json_index) is a natural key of the sample table.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}}}, "sensor": { "entity_name": "sensor", "entity_definition": "Technical device that produces signals in response to certain physical stimuli. Specifically, the corpus contains data from different camera and sonar sensors.", "relations": { "stream": "one-to-many"}, "attributes": { "sensor_description": { "attribute_name": "sensor_description", "attribute_definition": "Description of the sensor.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}, "sensor_id": { "attribute_name": "sensor_id", "attribute_definition": "Primary key of the sensor table.", "sqlite_datatype": "integer", "unique": true, "nullable": false, "unit": null}, "sensor_key": { "attribute_name": "sensor_key", "attribute_definition": "Textual identifier of the sensor, used as prefix of logfile keys. Composed of the sensor type and the sensor model: ${sensor_key} = ${sensor_type}-${sensor_model}. Natural key of the sensor table.", "sqlite_datatype": "text", "unique": true, "nullable": false, "unit": null}, "sensor_model": { "attribute_name": "sensor_model", "attribute_definition": "Textual identifier of the sensor model. Unique for a given sensor type. Used as suffix of the sensor key.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}, "sensor_name": { "attribute_name": "sensor_name", "attribute_definition": "Name of the sensor for human consumption, e.g. by manufacturers and retailers. Another natural key of the sensor table.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}, "sensor_specification": { "attribute_name": "sensor_specification", "attribute_definition": "Relevant attributes from the sensor datasheet.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}, "sensor_type": { "attribute_name": "sensor_type", "attribute_definition": "Textual identifier of the sensor type. Used as prefix of the sensor key.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}}}, "session": { "entity_name": "session", "entity_definition": "Data acquisition effort at a single location within a bounded time frame under specific conditions. Conditions include available resources (personnel, material) and relevant environment conditions. In the data corpus, each session is associated with the set of datasets that are collected during the session. Therefore, session colloquially also refers to such a set of datasets.", "relations": { "dataset": "one-to-many", "location": "many-to-one"}, "attributes": { "location_id": { "attribute_name": "location_id", "attribute_definition": "Foreign key to the location table.", "sqlite_datatype": "integer", "unique": false, "nullable": false, "unit": null}, "session_comments": { "attribute_name": "session_comments", "attribute_definition": "Optional note, reporting unexpected events related to a session, or unusual features of a session.", "sqlite_datatype": "text", "unique": false, "nullable": true, "unit": null}, "session_description": { "attribute_name": "session_description", "attribute_definition": "Description of the session, covering the location, available resources, and relevant environment conditions in general terms.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}, "session_duration": { "attribute_name": "session_duration", "attribute_definition": "Time interval between the start and end of the session, in seconds.", "sqlite_datatype": "real", "unique": false, "nullable": false, "unit": "s"}, "session_end": { "attribute_name": "session_end", "attribute_definition": "Datetime at which the session ends. Equal to the dataset_end of the latest dataset of the session. ISO 8601 formatted with explicit local time zone information. Millisecond resolution.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}, "session_end_epoch": { "attribute_name": "session_end_epoch", "attribute_definition": "Timestamp at which the session ends. Equal to the dataset_end_epoch of the latest dataset of the session. Seconds since the UNIX epoch, 1970-01-01 00:00:00 UTC. Millisecond resolution.", "sqlite_datatype": "real", "unique": false, "nullable": false, "unit": "s"}, "session_id": { "attribute_name": "session_id", "attribute_definition": "Primary key of the session table.", "sqlite_datatype": "integer", "unique": true, "nullable": false, "unit": null}, "session_key": { "attribute_name": "session_key", "attribute_definition": "Name of a directory that contains all dataset directories of a session. Natural key of the session table. Made up of a prefix and a suffix, joined by an underscore: ${session_key} = ${prefix}_${suffix}. The prefix represents the local start date of the session in month resolution, following ISO 8601: ${prefix} = %Y-%m. The suffix is the location key associated with the session: ${suffix} = ${location_key}.", "sqlite_datatype": "text", "unique": true, "nullable": false, "unit": null}, "session_start": { "attribute_name": "session_start", "attribute_definition": "Datetime at which the session starts. Equal to the dataset_start of the earliest dataset of the session. ISO 8601 formatted with explicit local time zone information. Millisecond resolution.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}, "session_start_epoch": { "attribute_name": "session_start_epoch", "attribute_definition": "Timestamp at which the session starts. Equal to the dataset_start_epoch of the earliest dataset of the session. Seconds since the UNIX epoch, 1970-01-01 00:00:00 UTC. Millisecond resolution.", "sqlite_datatype": "real", "unique": false, "nullable": false, "unit": "s"}}}, "stream": { "entity_name": "stream", "entity_definition": "Timeseries of signals from a particular sensor, generated as part of a particular dataset, persisted in a logfile. Source for the extraction of datafiles. Streams in the data corpus are recorded using the ROS middleware (https://www.ros.org), therefore each stream consists of ROS messages that belog to certain ROS types and carry certain ROS topics. In the current data corpus, each stream has a dedicated logfile. Since other corpora may store multiple streams in a single logfile, a more general model is applied here.", "relations": { "datafile": "one-to-many", "dataset": "many-to-one", "logfile": "many-to-one", "sensor": "many-to-one"}, "attributes": { "dataset_id": { "attribute_name": "dataset_id", "attribute_definition": "Foreign key to the dataset table.", "sqlite_datatype": "integer", "unique": false, "nullable": false, "unit": null}, "logfile_id": { "attribute_name": "logfile_id", "attribute_definition": "Foreign key to the logfile table.", "sqlite_datatype": "integer", "unique": false, "nullable": false, "unit": null}, "num_ros_messages": { "attribute_name": "num_ros_messages", "attribute_definition": "Number or ROS messages contained in the stream.", "sqlite_datatype": "integer", "unique": false, "nullable": false, "unit": null}, "sensor_configuration": { "attribute_name": "sensor_configuration", "attribute_definition": "Description of the sensor configuration during the recording. Only available for the Blueprint Oculus M1200d sonar, otherwise NULL.", "sqlite_datatype": "text", "unique": false, "nullable": true, "unit": null}, "sensor_id": { "attribute_name": "sensor_id", "attribute_definition": "Foreign key to the sensor table.", "sqlite_datatype": "integer", "unique": false, "nullable": false, "unit": null}, "stream_duration": { "attribute_name": "stream_duration", "attribute_definition": "Time interval between start and end of the stream, in seconds.", "sqlite_datatype": "real", "unique": false, "nullable": false, "unit": "s"}, "stream_end": { "attribute_name": "stream_end", "attribute_definition": "Datetime at which the stream ends. Equal to the logfile_end of the stream's logfile. ISO 8601 formatted with explicit local time zone information. Millisecond resolution.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}, "stream_end_epoch": { "attribute_name": "stream_end_epoch", "attribute_definition": "Timestamp at which the stream ends. Equal to the logfile_end_epoch of the stream's logfile. Seconds since the UNIX epoch, 1970-01-01 00:00:00 UTC. Millisecond resolution.", "sqlite_datatype": "real", "unique": false, "nullable": false, "unit": "s"}, "stream_id": { "attribute_name": "stream_id", "attribute_definition": "Primary key of the stream table.", "sqlite_datatype": "integer", "unique": true, "nullable": false, "unit": null}, "stream_key": { "attribute_name": "stream_key", "attribute_definition": "Textual identifier of the stream. Equal to the stream's logfile_key without the filename extension. Natural key of the stream table.", "sqlite_datatype": "text", "unique": true, "nullable": false, "unit": null}, "stream_start": { "attribute_name": "stream_start", "attribute_definition": "Datetime at which the stream starts. Equal to the logfile_start of the stream's logfile. ISO 8601 formatted with explicit local time zone information. Millisecond resolution.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}, "stream_start_epoch": { "attribute_name": "stream_start_epoch", "attribute_definition": "Timestamp at which the stream starts. Equal to the logfile_start_epoch of the stream's logfile. Seconds since the UNIX epoch, 1970-01-01 00:00:00 UTC. Millisecond resolution.", "sqlite_datatype": "real", "unique": false, "nullable": false, "unit": "s"}, "ros_topics": { "attribute_name": "ros_topics", "attribute_definition": "ROS topics contained in the stream.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}, "ros_types": { "attribute_name": "ros_types", "attribute_definition": "ROS types contained in the stream.", "sqlite_datatype": "text", "unique": false, "nullable": false, "unit": null}}}}