bench_executor.virtuoso
Virtuoso is a secure and high-performance platform for modern data access, integration, virtualization, and multi-model data management (tables & graphs) based on innovative support of existing open standards (e.g., SQL, SPARQL, and GraphQL).
Website: https://virtuoso.openlinksw.com/
Repository: https://github.com/openlink/virtuoso-opensource
1#!/usr/bin/env python3 2 3""" 4Virtuoso is a secure and high-performance platform for modern data access, 5integration, virtualization, and multi-model data management (tables & graphs) 6based on innovative support of existing open standards 7(e.g., SQL, SPARQL, and GraphQL). 8 9**Website**: https://virtuoso.openlinksw.com/<br> 10**Repository**: https://github.com/openlink/virtuoso-opensource 11""" 12 13import os 14import tempfile 15import psutil 16from typing import Dict 17from threading import Thread 18from bench_executor.container import Container 19from bench_executor.logger import Logger 20 21VERSION = '7.2.7' 22MAX_ROWS = '10000000' 23QUERY_TIMEOUT = '0' # no limit 24MAX_VECTOR_SIZE = '3000000' # max value is 'around' 3,500,000 from docs 25PASSWORD = 'root' 26NUMBER_OF_BUFFERS_PER_GB = 85000 27MAX_DIRTY_BUFFERS_PER_GB = 65000 28 29 30def _spawn_loader(container): 31 """Thread function to parallel load RDF. 32 33 Parameters 34 ---------- 35 container : Container 36 The Virtuoso container on which the RDF loader should run. 37 """ 38 success, logs = container.exec('\'isql\' -U dba -P root ' 39 'exec="rdf_loader_run();"') 40 41 42class Virtuoso(Container): 43 """Virtuoso container to execute SPARQL queries""" 44 45 def __init__(self, data_path: str, config_path: str, directory: str, 46 verbose: bool): 47 """Creates an instance of the Virtuoso class. 48 49 Parameters 50 ---------- 51 data_path : str 52 Path to the data directory of the case. 53 config_path : str 54 Path to the config directory of the case. 55 directory : str 56 Path to the directory to store logs. 57 verbose : bool 58 Enable verbose logs. 59 """ 60 self._data_path = os.path.abspath(data_path) 61 self._config_path = os.path.abspath(config_path) 62 self._logger = Logger(__name__, directory, verbose) 63 64 tmp_dir = os.path.join(tempfile.gettempdir(), 'virtuoso') 65 os.umask(0) 66 os.makedirs(tmp_dir, exist_ok=True) 67 os.makedirs(os.path.join(self._data_path, 'virtuoso'), exist_ok=True) 68 number_of_buffers = int(psutil.virtual_memory().total / (10**9) 69 * NUMBER_OF_BUFFERS_PER_GB) 70 max_dirty_buffers = int(psutil.virtual_memory().total / (10**9) 71 * MAX_DIRTY_BUFFERS_PER_GB) 72 environment = {'DBA_PASSWORD': PASSWORD, 73 'VIRT_SPARQL_ResultSetMaxRows': MAX_ROWS, 74 'VIRT_SPARQL_MaxQueryExecutionTime': QUERY_TIMEOUT, 75 'VIRT_SPARQL_ExecutionTimeout': QUERY_TIMEOUT, 76 'VIRT_SPARQL_MaxQueryCostEstimationTime': QUERY_TIMEOUT, 77 'VIRT_Parameters_MaxVectorSize': MAX_VECTOR_SIZE, 78 'VIRT_Parameters_NumberOfBuffers': number_of_buffers, 79 'VIRT_Parameters_MaxDirtyBuffers': max_dirty_buffers} 80 super().__init__(f'kgconstruct/virtuoso:v{VERSION}', 81 'Virtuoso', self._logger, 82 ports={'8890': '8890', '1111': '1111'}, 83 environment=environment, 84 volumes=[f'{self._data_path}/shared:/usr/share/proj', 85 f'{tmp_dir}:/database']) 86 self._endpoint = 'http://localhost:8890/sparql' 87 88 def initialization(self) -> bool: 89 """Initialize Virtuoso's database. 90 91 Returns 92 ------- 93 success : bool 94 Whether the initialization was successfull or not. 95 """ 96 # Virtuoso should start with a initialized database, start Virtuoso 97 # if not initialized to avoid the pre-run start during benchmark 98 # execution 99 success = self.wait_until_ready() 100 if not success: 101 return False 102 success = self.stop() 103 104 return success 105 106 @property 107 def root_mount_directory(self) -> str: 108 """Subdirectory in the root directory of the case for Virtuoso. 109 110 Returns 111 ------- 112 subdirectory : str 113 Subdirectory of the root directory for Virtuoso. 114 """ 115 return __name__.lower() 116 117 def wait_until_ready(self, command: str = '') -> bool: 118 """Wait until Virtuoso is ready to execute SPARQL queries. 119 120 Parameters 121 ---------- 122 command : str 123 Command to execute in the Virtuoso container, optionally, defaults 124 to no command. 125 126 Returns 127 ------- 128 success : bool 129 Whether the Virtuoso was initialized successfull or not. 130 """ 131 return self.run_and_wait_for_log('Server online at', command=command) 132 133 def load(self, rdf_file: str, rdf_dir: str = '') -> bool: 134 """Load an RDF file into Virtuoso. 135 136 Currently, only N-Triples files are supported. 137 138 Parameters 139 ---------- 140 rdf_file : str 141 Name of the RDF file to load. 142 rdf_dir : str 143 Name of the directory where RDF file(s) are stored. 144 Default root of the data directory. 145 146 Returns 147 ------- 148 success : bool 149 Whether the loading was successfull or not. 150 """ 151 return self.load_parallel(rdf_file, 1, rdf_dir) 152 153 def load_parallel(self, rdf_file: str, cores: int, 154 rdf_dir: str = '') -> bool: 155 """Load an RDF file into Virtuoso in parallel. 156 157 Currently, only N-Triples files are supported. 158 159 Parameters 160 ---------- 161 rdf_file : str 162 Name of the RDF file to load. 163 cores : int 164 Number of CPU cores for loading. 165 rdf_dir : str 166 Name of the directory where RDF file(s) are stored. 167 Default root of the data directory. 168 169 Returns 170 ------- 171 success : bool 172 Whether the loading was successfull or not. 173 """ 174 success = True 175 176 success, logs = self.exec(f'sh -c "ls /usr/share/proj/{rdf_file}"') 177 for line in logs: 178 self._logger.debug(line) 179 if not success: 180 self._logger.error('RDF files do not exist for loading') 181 return False 182 183 # Load directory with data 184 directory = f'/usr/share/proj/{rdf_dir}' 185 success, logs = self.exec('\'isql\' -U dba -P root ' 186 f'exec="ld_dir(\'{directory}\',' 187 f'\'{rdf_file}\', ' 188 '\'http://example.com/graph\');"') 189 for line in logs: 190 self._logger.debug(line) 191 if not success: 192 self._logger.error('ISQL loader query failure') 193 return False 194 195 loader_threads = [] 196 self._logger.debug(f'Spawning {cores} loader threads') 197 for i in range(cores): 198 t = Thread(target=_spawn_loader, args=(self,), daemon=True) 199 t.start() 200 loader_threads.append(t) 201 202 for t in loader_threads: 203 t.join() 204 self._logger.debug(f'Loading finished with {cores} threads') 205 206 # Re-enable checkpoints and scheduler which are disabled automatically 207 # after loading RDF with rdf_loader_run() 208 success, logs = self.exec('\'isql\' -U dba -P root exec="checkpoint;"') 209 for line in logs: 210 self._logger.debug(line) 211 if not success: 212 self._logger.error('ISQL re-enable checkpoints query failure') 213 return False 214 215 success, logs = self.exec('\'isql\' -U dba -P root ' 216 'exec="checkpoint_interval(60);"') 217 for line in logs: 218 self._logger.debug(line) 219 if not success: 220 self._logger.error('ISQL checkpoint interval query failure') 221 return False 222 223 success, logs = self.exec('\'isql\' -U dba -P root ' 224 'exec="scheduler_interval(10);"') 225 for line in logs: 226 self._logger.debug(line) 227 if not success: 228 self._logger.error('ISQL scheduler interval query failure') 229 return False 230 231 return success 232 233 def stop(self) -> bool: 234 """Stop Virtuoso. 235 236 Drops all triples in Virtuoso before stopping its container. 237 238 Returns 239 ------- 240 success : bool 241 Whether stopping Virtuoso was successfull or not. 242 """ 243 # Drop loaded triples 244 success, logs = self.exec('\'isql\' -U dba -P root ' 245 'exec="delete from DB.DBA.load_list;"') 246 for line in logs: 247 self._logger.debug(line) 248 if not success: 249 self._logger.error('ISQL delete load list query failure') 250 return False 251 252 success, logs = self.exec('\'isql\' -U dba -P root ' 253 'exec="rdf_global_reset();"') 254 for line in logs: 255 self._logger.debug(line) 256 if not success: 257 self._logger.error('ISQL RDF global reset query failure') 258 return False 259 return super().stop() 260 261 @property 262 def endpoint(self) -> str: 263 """SPARQL endpoint URL""" 264 return self._endpoint 265 266 @property 267 def headers(self) -> Dict[str, Dict[str, str]]: 268 """HTTP headers of SPARQL queries for serialization formats. 269 270 Only supported serialization formats are included in the dictionary. 271 Currently, the following formats are supported: 272 - N-Triples 273 - Turtle 274 - CSV 275 - RDF/JSON 276 - RDF/XML 277 - JSON-LD 278 279 Returns 280 ------- 281 headers : dict 282 Dictionary of headers to use for each serialization format. 283 """ 284 headers = {} 285 headers['ntriples'] = {'Accept': 'text/ntriples'} 286 headers['turtle'] = {'Accept': 'text/turtle'} 287 headers['rdfxml'] = {'Accept': 'application/rdf+xml'} 288 headers['rdfjson'] = {'Accept': 'application/rdf+json'} 289 headers['csv'] = {'Accept': 'text/csv'} 290 headers['jsonld'] = {'Accept': 'application/ld+json'} 291 return headers 292 293 294if __name__ == '__main__': 295 print(f'ℹ️ Starting up Virtuoso v{VERSION}...') 296 v = Virtuoso('data', 'config', 'log', True) 297 v.wait_until_ready() 298 input('ℹ️ Press any key to stop') 299 v.stop() 300 print('ℹ️ Stopped')
43class Virtuoso(Container): 44 """Virtuoso container to execute SPARQL queries""" 45 46 def __init__(self, data_path: str, config_path: str, directory: str, 47 verbose: bool): 48 """Creates an instance of the Virtuoso class. 49 50 Parameters 51 ---------- 52 data_path : str 53 Path to the data directory of the case. 54 config_path : str 55 Path to the config directory of the case. 56 directory : str 57 Path to the directory to store logs. 58 verbose : bool 59 Enable verbose logs. 60 """ 61 self._data_path = os.path.abspath(data_path) 62 self._config_path = os.path.abspath(config_path) 63 self._logger = Logger(__name__, directory, verbose) 64 65 tmp_dir = os.path.join(tempfile.gettempdir(), 'virtuoso') 66 os.umask(0) 67 os.makedirs(tmp_dir, exist_ok=True) 68 os.makedirs(os.path.join(self._data_path, 'virtuoso'), exist_ok=True) 69 number_of_buffers = int(psutil.virtual_memory().total / (10**9) 70 * NUMBER_OF_BUFFERS_PER_GB) 71 max_dirty_buffers = int(psutil.virtual_memory().total / (10**9) 72 * MAX_DIRTY_BUFFERS_PER_GB) 73 environment = {'DBA_PASSWORD': PASSWORD, 74 'VIRT_SPARQL_ResultSetMaxRows': MAX_ROWS, 75 'VIRT_SPARQL_MaxQueryExecutionTime': QUERY_TIMEOUT, 76 'VIRT_SPARQL_ExecutionTimeout': QUERY_TIMEOUT, 77 'VIRT_SPARQL_MaxQueryCostEstimationTime': QUERY_TIMEOUT, 78 'VIRT_Parameters_MaxVectorSize': MAX_VECTOR_SIZE, 79 'VIRT_Parameters_NumberOfBuffers': number_of_buffers, 80 'VIRT_Parameters_MaxDirtyBuffers': max_dirty_buffers} 81 super().__init__(f'kgconstruct/virtuoso:v{VERSION}', 82 'Virtuoso', self._logger, 83 ports={'8890': '8890', '1111': '1111'}, 84 environment=environment, 85 volumes=[f'{self._data_path}/shared:/usr/share/proj', 86 f'{tmp_dir}:/database']) 87 self._endpoint = 'http://localhost:8890/sparql' 88 89 def initialization(self) -> bool: 90 """Initialize Virtuoso's database. 91 92 Returns 93 ------- 94 success : bool 95 Whether the initialization was successfull or not. 96 """ 97 # Virtuoso should start with a initialized database, start Virtuoso 98 # if not initialized to avoid the pre-run start during benchmark 99 # execution 100 success = self.wait_until_ready() 101 if not success: 102 return False 103 success = self.stop() 104 105 return success 106 107 @property 108 def root_mount_directory(self) -> str: 109 """Subdirectory in the root directory of the case for Virtuoso. 110 111 Returns 112 ------- 113 subdirectory : str 114 Subdirectory of the root directory for Virtuoso. 115 """ 116 return __name__.lower() 117 118 def wait_until_ready(self, command: str = '') -> bool: 119 """Wait until Virtuoso is ready to execute SPARQL queries. 120 121 Parameters 122 ---------- 123 command : str 124 Command to execute in the Virtuoso container, optionally, defaults 125 to no command. 126 127 Returns 128 ------- 129 success : bool 130 Whether the Virtuoso was initialized successfull or not. 131 """ 132 return self.run_and_wait_for_log('Server online at', command=command) 133 134 def load(self, rdf_file: str, rdf_dir: str = '') -> bool: 135 """Load an RDF file into Virtuoso. 136 137 Currently, only N-Triples files are supported. 138 139 Parameters 140 ---------- 141 rdf_file : str 142 Name of the RDF file to load. 143 rdf_dir : str 144 Name of the directory where RDF file(s) are stored. 145 Default root of the data directory. 146 147 Returns 148 ------- 149 success : bool 150 Whether the loading was successfull or not. 151 """ 152 return self.load_parallel(rdf_file, 1, rdf_dir) 153 154 def load_parallel(self, rdf_file: str, cores: int, 155 rdf_dir: str = '') -> bool: 156 """Load an RDF file into Virtuoso in parallel. 157 158 Currently, only N-Triples files are supported. 159 160 Parameters 161 ---------- 162 rdf_file : str 163 Name of the RDF file to load. 164 cores : int 165 Number of CPU cores for loading. 166 rdf_dir : str 167 Name of the directory where RDF file(s) are stored. 168 Default root of the data directory. 169 170 Returns 171 ------- 172 success : bool 173 Whether the loading was successfull or not. 174 """ 175 success = True 176 177 success, logs = self.exec(f'sh -c "ls /usr/share/proj/{rdf_file}"') 178 for line in logs: 179 self._logger.debug(line) 180 if not success: 181 self._logger.error('RDF files do not exist for loading') 182 return False 183 184 # Load directory with data 185 directory = f'/usr/share/proj/{rdf_dir}' 186 success, logs = self.exec('\'isql\' -U dba -P root ' 187 f'exec="ld_dir(\'{directory}\',' 188 f'\'{rdf_file}\', ' 189 '\'http://example.com/graph\');"') 190 for line in logs: 191 self._logger.debug(line) 192 if not success: 193 self._logger.error('ISQL loader query failure') 194 return False 195 196 loader_threads = [] 197 self._logger.debug(f'Spawning {cores} loader threads') 198 for i in range(cores): 199 t = Thread(target=_spawn_loader, args=(self,), daemon=True) 200 t.start() 201 loader_threads.append(t) 202 203 for t in loader_threads: 204 t.join() 205 self._logger.debug(f'Loading finished with {cores} threads') 206 207 # Re-enable checkpoints and scheduler which are disabled automatically 208 # after loading RDF with rdf_loader_run() 209 success, logs = self.exec('\'isql\' -U dba -P root exec="checkpoint;"') 210 for line in logs: 211 self._logger.debug(line) 212 if not success: 213 self._logger.error('ISQL re-enable checkpoints query failure') 214 return False 215 216 success, logs = self.exec('\'isql\' -U dba -P root ' 217 'exec="checkpoint_interval(60);"') 218 for line in logs: 219 self._logger.debug(line) 220 if not success: 221 self._logger.error('ISQL checkpoint interval query failure') 222 return False 223 224 success, logs = self.exec('\'isql\' -U dba -P root ' 225 'exec="scheduler_interval(10);"') 226 for line in logs: 227 self._logger.debug(line) 228 if not success: 229 self._logger.error('ISQL scheduler interval query failure') 230 return False 231 232 return success 233 234 def stop(self) -> bool: 235 """Stop Virtuoso. 236 237 Drops all triples in Virtuoso before stopping its container. 238 239 Returns 240 ------- 241 success : bool 242 Whether stopping Virtuoso was successfull or not. 243 """ 244 # Drop loaded triples 245 success, logs = self.exec('\'isql\' -U dba -P root ' 246 'exec="delete from DB.DBA.load_list;"') 247 for line in logs: 248 self._logger.debug(line) 249 if not success: 250 self._logger.error('ISQL delete load list query failure') 251 return False 252 253 success, logs = self.exec('\'isql\' -U dba -P root ' 254 'exec="rdf_global_reset();"') 255 for line in logs: 256 self._logger.debug(line) 257 if not success: 258 self._logger.error('ISQL RDF global reset query failure') 259 return False 260 return super().stop() 261 262 @property 263 def endpoint(self) -> str: 264 """SPARQL endpoint URL""" 265 return self._endpoint 266 267 @property 268 def headers(self) -> Dict[str, Dict[str, str]]: 269 """HTTP headers of SPARQL queries for serialization formats. 270 271 Only supported serialization formats are included in the dictionary. 272 Currently, the following formats are supported: 273 - N-Triples 274 - Turtle 275 - CSV 276 - RDF/JSON 277 - RDF/XML 278 - JSON-LD 279 280 Returns 281 ------- 282 headers : dict 283 Dictionary of headers to use for each serialization format. 284 """ 285 headers = {} 286 headers['ntriples'] = {'Accept': 'text/ntriples'} 287 headers['turtle'] = {'Accept': 'text/turtle'} 288 headers['rdfxml'] = {'Accept': 'application/rdf+xml'} 289 headers['rdfjson'] = {'Accept': 'application/rdf+json'} 290 headers['csv'] = {'Accept': 'text/csv'} 291 headers['jsonld'] = {'Accept': 'application/ld+json'} 292 return headers
Virtuoso container to execute SPARQL queries
46 def __init__(self, data_path: str, config_path: str, directory: str, 47 verbose: bool): 48 """Creates an instance of the Virtuoso class. 49 50 Parameters 51 ---------- 52 data_path : str 53 Path to the data directory of the case. 54 config_path : str 55 Path to the config directory of the case. 56 directory : str 57 Path to the directory to store logs. 58 verbose : bool 59 Enable verbose logs. 60 """ 61 self._data_path = os.path.abspath(data_path) 62 self._config_path = os.path.abspath(config_path) 63 self._logger = Logger(__name__, directory, verbose) 64 65 tmp_dir = os.path.join(tempfile.gettempdir(), 'virtuoso') 66 os.umask(0) 67 os.makedirs(tmp_dir, exist_ok=True) 68 os.makedirs(os.path.join(self._data_path, 'virtuoso'), exist_ok=True) 69 number_of_buffers = int(psutil.virtual_memory().total / (10**9) 70 * NUMBER_OF_BUFFERS_PER_GB) 71 max_dirty_buffers = int(psutil.virtual_memory().total / (10**9) 72 * MAX_DIRTY_BUFFERS_PER_GB) 73 environment = {'DBA_PASSWORD': PASSWORD, 74 'VIRT_SPARQL_ResultSetMaxRows': MAX_ROWS, 75 'VIRT_SPARQL_MaxQueryExecutionTime': QUERY_TIMEOUT, 76 'VIRT_SPARQL_ExecutionTimeout': QUERY_TIMEOUT, 77 'VIRT_SPARQL_MaxQueryCostEstimationTime': QUERY_TIMEOUT, 78 'VIRT_Parameters_MaxVectorSize': MAX_VECTOR_SIZE, 79 'VIRT_Parameters_NumberOfBuffers': number_of_buffers, 80 'VIRT_Parameters_MaxDirtyBuffers': max_dirty_buffers} 81 super().__init__(f'kgconstruct/virtuoso:v{VERSION}', 82 'Virtuoso', self._logger, 83 ports={'8890': '8890', '1111': '1111'}, 84 environment=environment, 85 volumes=[f'{self._data_path}/shared:/usr/share/proj', 86 f'{tmp_dir}:/database']) 87 self._endpoint = 'http://localhost:8890/sparql'
Creates an instance of the Virtuoso class.
Parameters
- data_path (str): Path to the data directory of the case.
- config_path (str): Path to the config directory of the case.
- directory (str): Path to the directory to store logs.
- verbose (bool): Enable verbose logs.
89 def initialization(self) -> bool: 90 """Initialize Virtuoso's database. 91 92 Returns 93 ------- 94 success : bool 95 Whether the initialization was successfull or not. 96 """ 97 # Virtuoso should start with a initialized database, start Virtuoso 98 # if not initialized to avoid the pre-run start during benchmark 99 # execution 100 success = self.wait_until_ready() 101 if not success: 102 return False 103 success = self.stop() 104 105 return success
Initialize Virtuoso's database.
Returns
- success (bool): Whether the initialization was successfull or not.
107 @property 108 def root_mount_directory(self) -> str: 109 """Subdirectory in the root directory of the case for Virtuoso. 110 111 Returns 112 ------- 113 subdirectory : str 114 Subdirectory of the root directory for Virtuoso. 115 """ 116 return __name__.lower()
Subdirectory in the root directory of the case for Virtuoso.
Returns
- subdirectory (str): Subdirectory of the root directory for Virtuoso.
118 def wait_until_ready(self, command: str = '') -> bool: 119 """Wait until Virtuoso is ready to execute SPARQL queries. 120 121 Parameters 122 ---------- 123 command : str 124 Command to execute in the Virtuoso container, optionally, defaults 125 to no command. 126 127 Returns 128 ------- 129 success : bool 130 Whether the Virtuoso was initialized successfull or not. 131 """ 132 return self.run_and_wait_for_log('Server online at', command=command)
Wait until Virtuoso is ready to execute SPARQL queries.
Parameters
- command (str): Command to execute in the Virtuoso container, optionally, defaults to no command.
Returns
- success (bool): Whether the Virtuoso was initialized successfull or not.
134 def load(self, rdf_file: str, rdf_dir: str = '') -> bool: 135 """Load an RDF file into Virtuoso. 136 137 Currently, only N-Triples files are supported. 138 139 Parameters 140 ---------- 141 rdf_file : str 142 Name of the RDF file to load. 143 rdf_dir : str 144 Name of the directory where RDF file(s) are stored. 145 Default root of the data directory. 146 147 Returns 148 ------- 149 success : bool 150 Whether the loading was successfull or not. 151 """ 152 return self.load_parallel(rdf_file, 1, rdf_dir)
Load an RDF file into Virtuoso.
Currently, only N-Triples files are supported.
Parameters
- rdf_file (str): Name of the RDF file to load.
- rdf_dir (str): Name of the directory where RDF file(s) are stored. Default root of the data directory.
Returns
- success (bool): Whether the loading was successfull or not.
154 def load_parallel(self, rdf_file: str, cores: int, 155 rdf_dir: str = '') -> bool: 156 """Load an RDF file into Virtuoso in parallel. 157 158 Currently, only N-Triples files are supported. 159 160 Parameters 161 ---------- 162 rdf_file : str 163 Name of the RDF file to load. 164 cores : int 165 Number of CPU cores for loading. 166 rdf_dir : str 167 Name of the directory where RDF file(s) are stored. 168 Default root of the data directory. 169 170 Returns 171 ------- 172 success : bool 173 Whether the loading was successfull or not. 174 """ 175 success = True 176 177 success, logs = self.exec(f'sh -c "ls /usr/share/proj/{rdf_file}"') 178 for line in logs: 179 self._logger.debug(line) 180 if not success: 181 self._logger.error('RDF files do not exist for loading') 182 return False 183 184 # Load directory with data 185 directory = f'/usr/share/proj/{rdf_dir}' 186 success, logs = self.exec('\'isql\' -U dba -P root ' 187 f'exec="ld_dir(\'{directory}\',' 188 f'\'{rdf_file}\', ' 189 '\'http://example.com/graph\');"') 190 for line in logs: 191 self._logger.debug(line) 192 if not success: 193 self._logger.error('ISQL loader query failure') 194 return False 195 196 loader_threads = [] 197 self._logger.debug(f'Spawning {cores} loader threads') 198 for i in range(cores): 199 t = Thread(target=_spawn_loader, args=(self,), daemon=True) 200 t.start() 201 loader_threads.append(t) 202 203 for t in loader_threads: 204 t.join() 205 self._logger.debug(f'Loading finished with {cores} threads') 206 207 # Re-enable checkpoints and scheduler which are disabled automatically 208 # after loading RDF with rdf_loader_run() 209 success, logs = self.exec('\'isql\' -U dba -P root exec="checkpoint;"') 210 for line in logs: 211 self._logger.debug(line) 212 if not success: 213 self._logger.error('ISQL re-enable checkpoints query failure') 214 return False 215 216 success, logs = self.exec('\'isql\' -U dba -P root ' 217 'exec="checkpoint_interval(60);"') 218 for line in logs: 219 self._logger.debug(line) 220 if not success: 221 self._logger.error('ISQL checkpoint interval query failure') 222 return False 223 224 success, logs = self.exec('\'isql\' -U dba -P root ' 225 'exec="scheduler_interval(10);"') 226 for line in logs: 227 self._logger.debug(line) 228 if not success: 229 self._logger.error('ISQL scheduler interval query failure') 230 return False 231 232 return success
Load an RDF file into Virtuoso in parallel.
Currently, only N-Triples files are supported.
Parameters
- rdf_file (str): Name of the RDF file to load.
- cores (int): Number of CPU cores for loading.
- rdf_dir (str): Name of the directory where RDF file(s) are stored. Default root of the data directory.
Returns
- success (bool): Whether the loading was successfull or not.
234 def stop(self) -> bool: 235 """Stop Virtuoso. 236 237 Drops all triples in Virtuoso before stopping its container. 238 239 Returns 240 ------- 241 success : bool 242 Whether stopping Virtuoso was successfull or not. 243 """ 244 # Drop loaded triples 245 success, logs = self.exec('\'isql\' -U dba -P root ' 246 'exec="delete from DB.DBA.load_list;"') 247 for line in logs: 248 self._logger.debug(line) 249 if not success: 250 self._logger.error('ISQL delete load list query failure') 251 return False 252 253 success, logs = self.exec('\'isql\' -U dba -P root ' 254 'exec="rdf_global_reset();"') 255 for line in logs: 256 self._logger.debug(line) 257 if not success: 258 self._logger.error('ISQL RDF global reset query failure') 259 return False 260 return super().stop()
Stop Virtuoso.
Drops all triples in Virtuoso before stopping its container.
Returns
- success (bool): Whether stopping Virtuoso was successfull or not.
262 @property 263 def endpoint(self) -> str: 264 """SPARQL endpoint URL""" 265 return self._endpoint
SPARQL endpoint URL
267 @property 268 def headers(self) -> Dict[str, Dict[str, str]]: 269 """HTTP headers of SPARQL queries for serialization formats. 270 271 Only supported serialization formats are included in the dictionary. 272 Currently, the following formats are supported: 273 - N-Triples 274 - Turtle 275 - CSV 276 - RDF/JSON 277 - RDF/XML 278 - JSON-LD 279 280 Returns 281 ------- 282 headers : dict 283 Dictionary of headers to use for each serialization format. 284 """ 285 headers = {} 286 headers['ntriples'] = {'Accept': 'text/ntriples'} 287 headers['turtle'] = {'Accept': 'text/turtle'} 288 headers['rdfxml'] = {'Accept': 'application/rdf+xml'} 289 headers['rdfjson'] = {'Accept': 'application/rdf+json'} 290 headers['csv'] = {'Accept': 'text/csv'} 291 headers['jsonld'] = {'Accept': 'application/ld+json'} 292 return headers
HTTP headers of SPARQL queries for serialization formats.
Only supported serialization formats are included in the dictionary. Currently, the following formats are supported:
- N-Triples
- Turtle
- CSV
- RDF/JSON
- RDF/XML
- JSON-LD
Returns
- headers (dict): Dictionary of headers to use for each serialization format.