#!/usr/bin/php
<?php

/*
 * The MIT License
 *
 * Copyright 2023 Austrian Centre for Digital Humanities.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

$composerDir = realpath(__DIR__);
while ($composerDir !== false && !file_exists("$composerDir/vendor")) {
    $composerDir = realpath("$composerDir/..");
}
require_once "$composerDir/vendor/autoload.php";

use acdhOeaw\arche\lib\schema\Ontology;
use acdhOeaw\arche\lib\Repo;
use zozlak\logging\Log;
use zozlak\logging\LogMultiplexer;
use Psr\Log\LogLevel;
use zozlak\RdfConstants as RDF;
use zozlak\argparse\ArgumentParser;
use acdhOeaw\UriNormalizerRetryConfig;
use rdfInterface\QuadInterface;
use quickRdf\RdfNamespace;
use quickRdfIo\Util as RdfIoUtil;
use acdhOeaw\arche\metadataCrawler\MetadataCrawler;
use acdhOeaw\arche\metadataCrawler\MetadataChecker;
use acdhOeaw\arche\metadataCrawler\MetadataCrawlerException;

$parser = new ArgumentParser();
$parser->addArgument('--defaultLang', default: 'und');
$parser->addArgument('--filecheckerReportDir');
$parser->addArgument('--noCheck', action: ArgumentParser::ACTION_STORE_TRUE);
$parser->addArgument('--noCheckProgress', action: ArgumentParser::ACTION_STORE_TRUE);
$parser->addArgument('--repositoryUrl', default: 'https://arche.acdh.oeaw.ac.at/api/');
$parser->addArgument('--retryAttempts', default: 3);
$parser->addArgument('--retryDelay', default: 2);
$parser->addArgument('--logFile', default: 'php://stdout');
$parser->addArgument('--errorLog', help: 'Path to a CSV log storing only the resulting metadata check errors');
$parser->addArgument('--verbose', action: ArgumentParser::ACTION_STORE_TRUE);
$parser->addArgument('--tmpDir', default: '/tmp');
$parser->addArgument('metadataDir');
$parser->addArgument('outputFile');
$parser->addArgument('dataDir');
$parser->addArgument('idPrefix');
$args   = $parser->parseArgs();

$log = new LogMultiplexer();
$log->addLog(new Log($args->logFile, $args->verbose ? LogLevel::DEBUG : LogLevel::INFO));

$filecheckerFile = ($args->filecheckerReportDir ?? '') . '/fileList.json';
if (!empty($args->filecheckerReportDir) && file_exists($filecheckerFile)) {
    copy($filecheckerFile, $args->metadataDir . '/fileList.json');
}
if (!file_exists($args->metadataDir . '/fileList.json')) {
    $log->error("Metadata directory $args->metadataDir doesn't contain the filechecker output (fileList.json)");
    exit(1);
}

$repo     = Repo::factoryFromUrl($args->repositoryUrl);
$ontology = Ontology::factoryRest($args->repositoryUrl, $args->tmpDir . '/ontology.cache', 36000);

try {
    $log->info("----------------------------------------");
    $log->info("Reading and merging metadata");
    $log->info("----------------------------------------");
    $crawler = new MetadataCrawler($args->metadataDir, $ontology, $repo->getSchema(), $args->idPrefix, $args->dataDir, $args->defaultLang, $log);
    $meta    = $crawler->crawl();

    // skip graph
    $meta = $meta->map(fn(QuadInterface $q) => $q->withGraph(null));

    $noErrors = true;
    if (!$args->noCheck) {
        if (!empty($args->errorLog)) {
            if (file_exists($args->errorLog)) {
                unlink($args->errorLog);
            }
            $errLog = new Log($args->errorLog, LogLevel::ERROR, "{MESSAGE}");
            $errLog->error('resource;error message');
            $log->addLog($errLog);
        }

        $log->info("----------------------------------------");
        $log->info("Checking merged metadata");
        $log->info("----------------------------------------");
        $retryCfg = new UriNormalizerRetryConfig($args->retryAttempts, $args->retryDelay, UriNormalizerRetryConfig::SCALE_POWER);
        $checker  = new MetadataChecker($ontology, $repo->getSchema(), $log, $retryCfg);
        $noErrors = $checker->check($meta, !$args->noCheckProgress);
        if (!$noErrors) {
            $log->error("Errors found! Please inspect the output above");
        }
    }

    $log->info("----------------------------------------");
    $log->info("Saving the output");
    $log->info("----------------------------------------");
    $nmsp = new RdfNamespace();
    $nmsp->add('https://vocabs.acdh.oeaw.ac.at/schema#', 'acdh');
    $nmsp->add('https://id.acdh.oeaw.ac.at/', 'acdhi');
    $nmsp->add(RDF::NMSP_RDF, 'rdf');
    RdfIoUtil::serialize($meta, 'text/turtle', $args->outputFile, $nmsp);
    $log->info("Output written to " . realpath($args->outputFile));

    exit($noErrors ? 0 : 2);
} catch (MetadataCrawlerException $e) {
    $log->error($e->getMessage());
    exit($e->getCode());
}
