bench_executor.virtuoso

Virtuoso is a secure and high-performance platform for modern data access, integration, virtualization, and multi-model data management (tables & graphs) based on innovative support of existing open standards (e.g., SQL, SPARQL, and GraphQL).

Website: https://virtuoso.openlinksw.com/
Repository: https://github.com/openlink/virtuoso-opensource

  1#!/usr/bin/env python3
  2
  3"""
  4Virtuoso is a secure and high-performance platform for modern data access,
  5integration, virtualization, and multi-model data management (tables & graphs)
  6based on innovative support of existing open standards
  7(e.g., SQL, SPARQL, and GraphQL).
  8
  9**Website**: https://virtuoso.openlinksw.com/<br>
 10**Repository**: https://github.com/openlink/virtuoso-opensource
 11"""
 12
 13import os
 14import tempfile
 15import psutil
 16from typing import Dict
 17from threading import Thread
 18from bench_executor.container import Container
 19from bench_executor.logger import Logger
 20
 21VERSION = '7.2.7'
 22MAX_ROWS = '10000000'
 23QUERY_TIMEOUT = '0'  # no limit
 24MAX_VECTOR_SIZE = '3000000'  # max value is 'around' 3,500,000 from docs
 25PASSWORD = 'root'
 26NUMBER_OF_BUFFERS_PER_GB = 85000
 27MAX_DIRTY_BUFFERS_PER_GB = 65000
 28
 29
 30def _spawn_loader(container):
 31    """Thread function to parallel load RDF.
 32
 33    Parameters
 34    ----------
 35    container : Container
 36        The Virtuoso container on which the RDF loader should run.
 37    """
 38    success, logs = container.exec('\'isql\' -U dba -P root '
 39                                   'exec="rdf_loader_run();"')
 40
 41
 42class Virtuoso(Container):
 43    """Virtuoso container to execute SPARQL queries"""
 44
 45    def __init__(self, data_path: str, config_path: str, directory: str,
 46                 verbose: bool):
 47        """Creates an instance of the Virtuoso class.
 48
 49        Parameters
 50        ----------
 51        data_path : str
 52            Path to the data directory of the case.
 53        config_path : str
 54            Path to the config directory of the case.
 55        directory : str
 56            Path to the directory to store logs.
 57        verbose : bool
 58            Enable verbose logs.
 59        """
 60        self._data_path = os.path.abspath(data_path)
 61        self._config_path = os.path.abspath(config_path)
 62        self._logger = Logger(__name__, directory, verbose)
 63
 64        tmp_dir = os.path.join(tempfile.gettempdir(), 'virtuoso')
 65        os.umask(0)
 66        os.makedirs(tmp_dir, exist_ok=True)
 67        os.makedirs(os.path.join(self._data_path, 'virtuoso'), exist_ok=True)
 68        number_of_buffers = int(psutil.virtual_memory().total / (10**9)
 69                                * NUMBER_OF_BUFFERS_PER_GB)
 70        max_dirty_buffers = int(psutil.virtual_memory().total / (10**9)
 71                                * MAX_DIRTY_BUFFERS_PER_GB)
 72        environment = {'DBA_PASSWORD': PASSWORD,
 73                       'VIRT_SPARQL_ResultSetMaxRows': MAX_ROWS,
 74                       'VIRT_SPARQL_MaxQueryExecutionTime': QUERY_TIMEOUT,
 75                       'VIRT_SPARQL_ExecutionTimeout': QUERY_TIMEOUT,
 76                       'VIRT_SPARQL_MaxQueryCostEstimationTime': QUERY_TIMEOUT,
 77                       'VIRT_Parameters_MaxVectorSize': MAX_VECTOR_SIZE,
 78                       'VIRT_Parameters_NumberOfBuffers': number_of_buffers,
 79                       'VIRT_Parameters_MaxDirtyBuffers': max_dirty_buffers}
 80        super().__init__(f'kgconstruct/virtuoso:v{VERSION}',
 81                         'Virtuoso', self._logger,
 82                         ports={'8890': '8890', '1111': '1111'},
 83                         environment=environment,
 84                         volumes=[f'{self._data_path}/shared:/usr/share/proj',
 85                                  f'{tmp_dir}:/database'])
 86        self._endpoint = 'http://localhost:8890/sparql'
 87
 88    def initialization(self) -> bool:
 89        """Initialize Virtuoso's database.
 90
 91        Returns
 92        -------
 93        success : bool
 94            Whether the initialization was successfull or not.
 95        """
 96        # Virtuoso should start with a initialized database, start Virtuoso
 97        # if not initialized to avoid the pre-run start during benchmark
 98        # execution
 99        success = self.wait_until_ready()
100        if not success:
101            return False
102        success = self.stop()
103
104        return success
105
106    @property
107    def root_mount_directory(self) -> str:
108        """Subdirectory in the root directory of the case for Virtuoso.
109
110        Returns
111        -------
112        subdirectory : str
113            Subdirectory of the root directory for Virtuoso.
114        """
115        return __name__.lower()
116
117    def wait_until_ready(self, command: str = '') -> bool:
118        """Wait until Virtuoso is ready to execute SPARQL queries.
119
120        Parameters
121        ----------
122        command : str
123            Command to execute in the Virtuoso container, optionally, defaults
124            to no command.
125
126        Returns
127        -------
128        success : bool
129            Whether the Virtuoso was initialized successfull or not.
130        """
131        return self.run_and_wait_for_log('Server online at', command=command)
132
133    def load(self, rdf_file: str, rdf_dir: str = '') -> bool:
134        """Load an RDF file into Virtuoso.
135
136        Currently, only N-Triples files are supported.
137
138        Parameters
139        ----------
140        rdf_file : str
141            Name of the RDF file to load.
142        rdf_dir : str
143            Name of the directory where RDF file(s) are stored.
144            Default root of the data directory.
145
146        Returns
147        -------
148        success : bool
149            Whether the loading was successfull or not.
150        """
151        return self.load_parallel(rdf_file, 1, rdf_dir)
152
153    def load_parallel(self, rdf_file: str, cores: int,
154                      rdf_dir: str = '') -> bool:
155        """Load an RDF file into Virtuoso in parallel.
156
157        Currently, only N-Triples files are supported.
158
159        Parameters
160        ----------
161        rdf_file : str
162            Name of the RDF file to load.
163        cores : int
164            Number of CPU cores for loading.
165        rdf_dir : str
166            Name of the directory where RDF file(s) are stored.
167            Default root of the data directory.
168
169        Returns
170        -------
171        success : bool
172            Whether the loading was successfull or not.
173        """
174        success = True
175
176        success, logs = self.exec(f'sh -c "ls /usr/share/proj/{rdf_file}"')
177        for line in logs:
178            self._logger.debug(line)
179        if not success:
180            self._logger.error('RDF files do not exist for loading')
181            return False
182
183        # Load directory with data
184        directory = f'/usr/share/proj/{rdf_dir}'
185        success, logs = self.exec('\'isql\' -U dba -P root '
186                                  f'exec="ld_dir(\'{directory}\','
187                                  f'\'{rdf_file}\', '
188                                  '\'http://example.com/graph\');"')
189        for line in logs:
190            self._logger.debug(line)
191        if not success:
192            self._logger.error('ISQL loader query failure')
193            return False
194
195        loader_threads = []
196        self._logger.debug(f'Spawning {cores} loader threads')
197        for i in range(cores):
198            t = Thread(target=_spawn_loader, args=(self,), daemon=True)
199            t.start()
200            loader_threads.append(t)
201
202        for t in loader_threads:
203            t.join()
204        self._logger.debug(f'Loading finished with {cores} threads')
205
206        # Re-enable checkpoints and scheduler which are disabled automatically
207        # after loading RDF with rdf_loader_run()
208        success, logs = self.exec('\'isql\' -U dba -P root exec="checkpoint;"')
209        for line in logs:
210            self._logger.debug(line)
211        if not success:
212            self._logger.error('ISQL re-enable checkpoints query failure')
213            return False
214
215        success, logs = self.exec('\'isql\' -U dba -P root '
216                                  'exec="checkpoint_interval(60);"')
217        for line in logs:
218            self._logger.debug(line)
219        if not success:
220            self._logger.error('ISQL checkpoint interval query failure')
221            return False
222
223        success, logs = self.exec('\'isql\' -U dba -P root '
224                                  'exec="scheduler_interval(10);"')
225        for line in logs:
226            self._logger.debug(line)
227        if not success:
228            self._logger.error('ISQL scheduler interval query failure')
229            return False
230
231        return success
232
233    def stop(self) -> bool:
234        """Stop Virtuoso.
235
236        Drops all triples in Virtuoso before stopping its container.
237
238        Returns
239        -------
240        success : bool
241            Whether stopping Virtuoso was successfull or not.
242        """
243        # Drop loaded triples
244        success, logs = self.exec('\'isql\' -U dba -P root '
245                                  'exec="delete from DB.DBA.load_list;"')
246        for line in logs:
247            self._logger.debug(line)
248        if not success:
249            self._logger.error('ISQL delete load list query failure')
250            return False
251
252        success, logs = self.exec('\'isql\' -U dba -P root '
253                                  'exec="rdf_global_reset();"')
254        for line in logs:
255            self._logger.debug(line)
256        if not success:
257            self._logger.error('ISQL RDF global reset query failure')
258            return False
259        return super().stop()
260
261    @property
262    def endpoint(self) -> str:
263        """SPARQL endpoint URL"""
264        return self._endpoint
265
266    @property
267    def headers(self) -> Dict[str, Dict[str, str]]:
268        """HTTP headers of SPARQL queries for serialization formats.
269
270        Only supported serialization formats are included in the dictionary.
271        Currently, the following formats are supported:
272        - N-Triples
273        - Turtle
274        - CSV
275        - RDF/JSON
276        - RDF/XML
277        - JSON-LD
278
279        Returns
280        -------
281        headers : dict
282            Dictionary of headers to use for each serialization format.
283        """
284        headers = {}
285        headers['ntriples'] = {'Accept': 'text/ntriples'}
286        headers['turtle'] = {'Accept': 'text/turtle'}
287        headers['rdfxml'] = {'Accept': 'application/rdf+xml'}
288        headers['rdfjson'] = {'Accept': 'application/rdf+json'}
289        headers['csv'] = {'Accept': 'text/csv'}
290        headers['jsonld'] = {'Accept': 'application/ld+json'}
291        return headers
292
293
294if __name__ == '__main__':
295    print(f'ℹ️  Starting up Virtuoso v{VERSION}...')
296    v = Virtuoso('data', 'config', 'log', True)
297    v.wait_until_ready()
298    input('ℹ️  Press any key to stop')
299    v.stop()
300    print('ℹ️  Stopped')
VERSION = '7.2.7'
MAX_ROWS = '10000000'
QUERY_TIMEOUT = '0'
MAX_VECTOR_SIZE = '3000000'
PASSWORD = 'root'
NUMBER_OF_BUFFERS_PER_GB = 85000
MAX_DIRTY_BUFFERS_PER_GB = 65000
class Virtuoso(bench_executor.container.Container):
 43class Virtuoso(Container):
 44    """Virtuoso container to execute SPARQL queries"""
 45
 46    def __init__(self, data_path: str, config_path: str, directory: str,
 47                 verbose: bool):
 48        """Creates an instance of the Virtuoso class.
 49
 50        Parameters
 51        ----------
 52        data_path : str
 53            Path to the data directory of the case.
 54        config_path : str
 55            Path to the config directory of the case.
 56        directory : str
 57            Path to the directory to store logs.
 58        verbose : bool
 59            Enable verbose logs.
 60        """
 61        self._data_path = os.path.abspath(data_path)
 62        self._config_path = os.path.abspath(config_path)
 63        self._logger = Logger(__name__, directory, verbose)
 64
 65        tmp_dir = os.path.join(tempfile.gettempdir(), 'virtuoso')
 66        os.umask(0)
 67        os.makedirs(tmp_dir, exist_ok=True)
 68        os.makedirs(os.path.join(self._data_path, 'virtuoso'), exist_ok=True)
 69        number_of_buffers = int(psutil.virtual_memory().total / (10**9)
 70                                * NUMBER_OF_BUFFERS_PER_GB)
 71        max_dirty_buffers = int(psutil.virtual_memory().total / (10**9)
 72                                * MAX_DIRTY_BUFFERS_PER_GB)
 73        environment = {'DBA_PASSWORD': PASSWORD,
 74                       'VIRT_SPARQL_ResultSetMaxRows': MAX_ROWS,
 75                       'VIRT_SPARQL_MaxQueryExecutionTime': QUERY_TIMEOUT,
 76                       'VIRT_SPARQL_ExecutionTimeout': QUERY_TIMEOUT,
 77                       'VIRT_SPARQL_MaxQueryCostEstimationTime': QUERY_TIMEOUT,
 78                       'VIRT_Parameters_MaxVectorSize': MAX_VECTOR_SIZE,
 79                       'VIRT_Parameters_NumberOfBuffers': number_of_buffers,
 80                       'VIRT_Parameters_MaxDirtyBuffers': max_dirty_buffers}
 81        super().__init__(f'kgconstruct/virtuoso:v{VERSION}',
 82                         'Virtuoso', self._logger,
 83                         ports={'8890': '8890', '1111': '1111'},
 84                         environment=environment,
 85                         volumes=[f'{self._data_path}/shared:/usr/share/proj',
 86                                  f'{tmp_dir}:/database'])
 87        self._endpoint = 'http://localhost:8890/sparql'
 88
 89    def initialization(self) -> bool:
 90        """Initialize Virtuoso's database.
 91
 92        Returns
 93        -------
 94        success : bool
 95            Whether the initialization was successfull or not.
 96        """
 97        # Virtuoso should start with a initialized database, start Virtuoso
 98        # if not initialized to avoid the pre-run start during benchmark
 99        # execution
100        success = self.wait_until_ready()
101        if not success:
102            return False
103        success = self.stop()
104
105        return success
106
107    @property
108    def root_mount_directory(self) -> str:
109        """Subdirectory in the root directory of the case for Virtuoso.
110
111        Returns
112        -------
113        subdirectory : str
114            Subdirectory of the root directory for Virtuoso.
115        """
116        return __name__.lower()
117
118    def wait_until_ready(self, command: str = '') -> bool:
119        """Wait until Virtuoso is ready to execute SPARQL queries.
120
121        Parameters
122        ----------
123        command : str
124            Command to execute in the Virtuoso container, optionally, defaults
125            to no command.
126
127        Returns
128        -------
129        success : bool
130            Whether the Virtuoso was initialized successfull or not.
131        """
132        return self.run_and_wait_for_log('Server online at', command=command)
133
134    def load(self, rdf_file: str, rdf_dir: str = '') -> bool:
135        """Load an RDF file into Virtuoso.
136
137        Currently, only N-Triples files are supported.
138
139        Parameters
140        ----------
141        rdf_file : str
142            Name of the RDF file to load.
143        rdf_dir : str
144            Name of the directory where RDF file(s) are stored.
145            Default root of the data directory.
146
147        Returns
148        -------
149        success : bool
150            Whether the loading was successfull or not.
151        """
152        return self.load_parallel(rdf_file, 1, rdf_dir)
153
154    def load_parallel(self, rdf_file: str, cores: int,
155                      rdf_dir: str = '') -> bool:
156        """Load an RDF file into Virtuoso in parallel.
157
158        Currently, only N-Triples files are supported.
159
160        Parameters
161        ----------
162        rdf_file : str
163            Name of the RDF file to load.
164        cores : int
165            Number of CPU cores for loading.
166        rdf_dir : str
167            Name of the directory where RDF file(s) are stored.
168            Default root of the data directory.
169
170        Returns
171        -------
172        success : bool
173            Whether the loading was successfull or not.
174        """
175        success = True
176
177        success, logs = self.exec(f'sh -c "ls /usr/share/proj/{rdf_file}"')
178        for line in logs:
179            self._logger.debug(line)
180        if not success:
181            self._logger.error('RDF files do not exist for loading')
182            return False
183
184        # Load directory with data
185        directory = f'/usr/share/proj/{rdf_dir}'
186        success, logs = self.exec('\'isql\' -U dba -P root '
187                                  f'exec="ld_dir(\'{directory}\','
188                                  f'\'{rdf_file}\', '
189                                  '\'http://example.com/graph\');"')
190        for line in logs:
191            self._logger.debug(line)
192        if not success:
193            self._logger.error('ISQL loader query failure')
194            return False
195
196        loader_threads = []
197        self._logger.debug(f'Spawning {cores} loader threads')
198        for i in range(cores):
199            t = Thread(target=_spawn_loader, args=(self,), daemon=True)
200            t.start()
201            loader_threads.append(t)
202
203        for t in loader_threads:
204            t.join()
205        self._logger.debug(f'Loading finished with {cores} threads')
206
207        # Re-enable checkpoints and scheduler which are disabled automatically
208        # after loading RDF with rdf_loader_run()
209        success, logs = self.exec('\'isql\' -U dba -P root exec="checkpoint;"')
210        for line in logs:
211            self._logger.debug(line)
212        if not success:
213            self._logger.error('ISQL re-enable checkpoints query failure')
214            return False
215
216        success, logs = self.exec('\'isql\' -U dba -P root '
217                                  'exec="checkpoint_interval(60);"')
218        for line in logs:
219            self._logger.debug(line)
220        if not success:
221            self._logger.error('ISQL checkpoint interval query failure')
222            return False
223
224        success, logs = self.exec('\'isql\' -U dba -P root '
225                                  'exec="scheduler_interval(10);"')
226        for line in logs:
227            self._logger.debug(line)
228        if not success:
229            self._logger.error('ISQL scheduler interval query failure')
230            return False
231
232        return success
233
234    def stop(self) -> bool:
235        """Stop Virtuoso.
236
237        Drops all triples in Virtuoso before stopping its container.
238
239        Returns
240        -------
241        success : bool
242            Whether stopping Virtuoso was successfull or not.
243        """
244        # Drop loaded triples
245        success, logs = self.exec('\'isql\' -U dba -P root '
246                                  'exec="delete from DB.DBA.load_list;"')
247        for line in logs:
248            self._logger.debug(line)
249        if not success:
250            self._logger.error('ISQL delete load list query failure')
251            return False
252
253        success, logs = self.exec('\'isql\' -U dba -P root '
254                                  'exec="rdf_global_reset();"')
255        for line in logs:
256            self._logger.debug(line)
257        if not success:
258            self._logger.error('ISQL RDF global reset query failure')
259            return False
260        return super().stop()
261
262    @property
263    def endpoint(self) -> str:
264        """SPARQL endpoint URL"""
265        return self._endpoint
266
267    @property
268    def headers(self) -> Dict[str, Dict[str, str]]:
269        """HTTP headers of SPARQL queries for serialization formats.
270
271        Only supported serialization formats are included in the dictionary.
272        Currently, the following formats are supported:
273        - N-Triples
274        - Turtle
275        - CSV
276        - RDF/JSON
277        - RDF/XML
278        - JSON-LD
279
280        Returns
281        -------
282        headers : dict
283            Dictionary of headers to use for each serialization format.
284        """
285        headers = {}
286        headers['ntriples'] = {'Accept': 'text/ntriples'}
287        headers['turtle'] = {'Accept': 'text/turtle'}
288        headers['rdfxml'] = {'Accept': 'application/rdf+xml'}
289        headers['rdfjson'] = {'Accept': 'application/rdf+json'}
290        headers['csv'] = {'Accept': 'text/csv'}
291        headers['jsonld'] = {'Accept': 'application/ld+json'}
292        return headers

Virtuoso container to execute SPARQL queries

Virtuoso(data_path: str, config_path: str, directory: str, verbose: bool)
46    def __init__(self, data_path: str, config_path: str, directory: str,
47                 verbose: bool):
48        """Creates an instance of the Virtuoso class.
49
50        Parameters
51        ----------
52        data_path : str
53            Path to the data directory of the case.
54        config_path : str
55            Path to the config directory of the case.
56        directory : str
57            Path to the directory to store logs.
58        verbose : bool
59            Enable verbose logs.
60        """
61        self._data_path = os.path.abspath(data_path)
62        self._config_path = os.path.abspath(config_path)
63        self._logger = Logger(__name__, directory, verbose)
64
65        tmp_dir = os.path.join(tempfile.gettempdir(), 'virtuoso')
66        os.umask(0)
67        os.makedirs(tmp_dir, exist_ok=True)
68        os.makedirs(os.path.join(self._data_path, 'virtuoso'), exist_ok=True)
69        number_of_buffers = int(psutil.virtual_memory().total / (10**9)
70                                * NUMBER_OF_BUFFERS_PER_GB)
71        max_dirty_buffers = int(psutil.virtual_memory().total / (10**9)
72                                * MAX_DIRTY_BUFFERS_PER_GB)
73        environment = {'DBA_PASSWORD': PASSWORD,
74                       'VIRT_SPARQL_ResultSetMaxRows': MAX_ROWS,
75                       'VIRT_SPARQL_MaxQueryExecutionTime': QUERY_TIMEOUT,
76                       'VIRT_SPARQL_ExecutionTimeout': QUERY_TIMEOUT,
77                       'VIRT_SPARQL_MaxQueryCostEstimationTime': QUERY_TIMEOUT,
78                       'VIRT_Parameters_MaxVectorSize': MAX_VECTOR_SIZE,
79                       'VIRT_Parameters_NumberOfBuffers': number_of_buffers,
80                       'VIRT_Parameters_MaxDirtyBuffers': max_dirty_buffers}
81        super().__init__(f'kgconstruct/virtuoso:v{VERSION}',
82                         'Virtuoso', self._logger,
83                         ports={'8890': '8890', '1111': '1111'},
84                         environment=environment,
85                         volumes=[f'{self._data_path}/shared:/usr/share/proj',
86                                  f'{tmp_dir}:/database'])
87        self._endpoint = 'http://localhost:8890/sparql'

Creates an instance of the Virtuoso class.

Parameters
  • data_path (str): Path to the data directory of the case.
  • config_path (str): Path to the config directory of the case.
  • directory (str): Path to the directory to store logs.
  • verbose (bool): Enable verbose logs.
def initialization(self) -> bool:
 89    def initialization(self) -> bool:
 90        """Initialize Virtuoso's database.
 91
 92        Returns
 93        -------
 94        success : bool
 95            Whether the initialization was successfull or not.
 96        """
 97        # Virtuoso should start with a initialized database, start Virtuoso
 98        # if not initialized to avoid the pre-run start during benchmark
 99        # execution
100        success = self.wait_until_ready()
101        if not success:
102            return False
103        success = self.stop()
104
105        return success

Initialize Virtuoso's database.

Returns
  • success (bool): Whether the initialization was successfull or not.
root_mount_directory: str
107    @property
108    def root_mount_directory(self) -> str:
109        """Subdirectory in the root directory of the case for Virtuoso.
110
111        Returns
112        -------
113        subdirectory : str
114            Subdirectory of the root directory for Virtuoso.
115        """
116        return __name__.lower()

Subdirectory in the root directory of the case for Virtuoso.

Returns
  • subdirectory (str): Subdirectory of the root directory for Virtuoso.
def wait_until_ready(self, command: str = '') -> bool:
118    def wait_until_ready(self, command: str = '') -> bool:
119        """Wait until Virtuoso is ready to execute SPARQL queries.
120
121        Parameters
122        ----------
123        command : str
124            Command to execute in the Virtuoso container, optionally, defaults
125            to no command.
126
127        Returns
128        -------
129        success : bool
130            Whether the Virtuoso was initialized successfull or not.
131        """
132        return self.run_and_wait_for_log('Server online at', command=command)

Wait until Virtuoso is ready to execute SPARQL queries.

Parameters
  • command (str): Command to execute in the Virtuoso container, optionally, defaults to no command.
Returns
  • success (bool): Whether the Virtuoso was initialized successfull or not.
def load(self, rdf_file: str, rdf_dir: str = '') -> bool:
134    def load(self, rdf_file: str, rdf_dir: str = '') -> bool:
135        """Load an RDF file into Virtuoso.
136
137        Currently, only N-Triples files are supported.
138
139        Parameters
140        ----------
141        rdf_file : str
142            Name of the RDF file to load.
143        rdf_dir : str
144            Name of the directory where RDF file(s) are stored.
145            Default root of the data directory.
146
147        Returns
148        -------
149        success : bool
150            Whether the loading was successfull or not.
151        """
152        return self.load_parallel(rdf_file, 1, rdf_dir)

Load an RDF file into Virtuoso.

Currently, only N-Triples files are supported.

Parameters
  • rdf_file (str): Name of the RDF file to load.
  • rdf_dir (str): Name of the directory where RDF file(s) are stored. Default root of the data directory.
Returns
  • success (bool): Whether the loading was successfull or not.
def load_parallel(self, rdf_file: str, cores: int, rdf_dir: str = '') -> bool:
154    def load_parallel(self, rdf_file: str, cores: int,
155                      rdf_dir: str = '') -> bool:
156        """Load an RDF file into Virtuoso in parallel.
157
158        Currently, only N-Triples files are supported.
159
160        Parameters
161        ----------
162        rdf_file : str
163            Name of the RDF file to load.
164        cores : int
165            Number of CPU cores for loading.
166        rdf_dir : str
167            Name of the directory where RDF file(s) are stored.
168            Default root of the data directory.
169
170        Returns
171        -------
172        success : bool
173            Whether the loading was successfull or not.
174        """
175        success = True
176
177        success, logs = self.exec(f'sh -c "ls /usr/share/proj/{rdf_file}"')
178        for line in logs:
179            self._logger.debug(line)
180        if not success:
181            self._logger.error('RDF files do not exist for loading')
182            return False
183
184        # Load directory with data
185        directory = f'/usr/share/proj/{rdf_dir}'
186        success, logs = self.exec('\'isql\' -U dba -P root '
187                                  f'exec="ld_dir(\'{directory}\','
188                                  f'\'{rdf_file}\', '
189                                  '\'http://example.com/graph\');"')
190        for line in logs:
191            self._logger.debug(line)
192        if not success:
193            self._logger.error('ISQL loader query failure')
194            return False
195
196        loader_threads = []
197        self._logger.debug(f'Spawning {cores} loader threads')
198        for i in range(cores):
199            t = Thread(target=_spawn_loader, args=(self,), daemon=True)
200            t.start()
201            loader_threads.append(t)
202
203        for t in loader_threads:
204            t.join()
205        self._logger.debug(f'Loading finished with {cores} threads')
206
207        # Re-enable checkpoints and scheduler which are disabled automatically
208        # after loading RDF with rdf_loader_run()
209        success, logs = self.exec('\'isql\' -U dba -P root exec="checkpoint;"')
210        for line in logs:
211            self._logger.debug(line)
212        if not success:
213            self._logger.error('ISQL re-enable checkpoints query failure')
214            return False
215
216        success, logs = self.exec('\'isql\' -U dba -P root '
217                                  'exec="checkpoint_interval(60);"')
218        for line in logs:
219            self._logger.debug(line)
220        if not success:
221            self._logger.error('ISQL checkpoint interval query failure')
222            return False
223
224        success, logs = self.exec('\'isql\' -U dba -P root '
225                                  'exec="scheduler_interval(10);"')
226        for line in logs:
227            self._logger.debug(line)
228        if not success:
229            self._logger.error('ISQL scheduler interval query failure')
230            return False
231
232        return success

Load an RDF file into Virtuoso in parallel.

Currently, only N-Triples files are supported.

Parameters
  • rdf_file (str): Name of the RDF file to load.
  • cores (int): Number of CPU cores for loading.
  • rdf_dir (str): Name of the directory where RDF file(s) are stored. Default root of the data directory.
Returns
  • success (bool): Whether the loading was successfull or not.
def stop(self) -> bool:
234    def stop(self) -> bool:
235        """Stop Virtuoso.
236
237        Drops all triples in Virtuoso before stopping its container.
238
239        Returns
240        -------
241        success : bool
242            Whether stopping Virtuoso was successfull or not.
243        """
244        # Drop loaded triples
245        success, logs = self.exec('\'isql\' -U dba -P root '
246                                  'exec="delete from DB.DBA.load_list;"')
247        for line in logs:
248            self._logger.debug(line)
249        if not success:
250            self._logger.error('ISQL delete load list query failure')
251            return False
252
253        success, logs = self.exec('\'isql\' -U dba -P root '
254                                  'exec="rdf_global_reset();"')
255        for line in logs:
256            self._logger.debug(line)
257        if not success:
258            self._logger.error('ISQL RDF global reset query failure')
259            return False
260        return super().stop()

Stop Virtuoso.

Drops all triples in Virtuoso before stopping its container.

Returns
  • success (bool): Whether stopping Virtuoso was successfull or not.
endpoint: str
262    @property
263    def endpoint(self) -> str:
264        """SPARQL endpoint URL"""
265        return self._endpoint

SPARQL endpoint URL

headers: Dict[str, Dict[str, str]]
267    @property
268    def headers(self) -> Dict[str, Dict[str, str]]:
269        """HTTP headers of SPARQL queries for serialization formats.
270
271        Only supported serialization formats are included in the dictionary.
272        Currently, the following formats are supported:
273        - N-Triples
274        - Turtle
275        - CSV
276        - RDF/JSON
277        - RDF/XML
278        - JSON-LD
279
280        Returns
281        -------
282        headers : dict
283            Dictionary of headers to use for each serialization format.
284        """
285        headers = {}
286        headers['ntriples'] = {'Accept': 'text/ntriples'}
287        headers['turtle'] = {'Accept': 'text/turtle'}
288        headers['rdfxml'] = {'Accept': 'application/rdf+xml'}
289        headers['rdfjson'] = {'Accept': 'application/rdf+json'}
290        headers['csv'] = {'Accept': 'text/csv'}
291        headers['jsonld'] = {'Accept': 'application/ld+json'}
292        return headers

HTTP headers of SPARQL queries for serialization formats.

Only supported serialization formats are included in the dictionary. Currently, the following formats are supported:

  • N-Triples
  • Turtle
  • CSV
  • RDF/JSON
  • RDF/XML
  • JSON-LD
Returns
  • headers (dict): Dictionary of headers to use for each serialization format.