#!/bin/sh
# Trigger birdhousecrawler on local Birdhouse host.
#
# Need to open temporary Thredds "testdata/secure/" on local Birdhouse host to anonymous group.
#
# birdhousecrawler is a method of the catalog WPS service to index Thredds
# catalog into Solr DB for quick searching.
#
# To crawl only 1 file:
#   trigger-birdhousecrawler target_files=birdhouse/testdata/secure/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc
#
# To crawl only 1 dir:
#   trigger-birdhousecrawler target_files=birdhouse/testdata
#
# Set env var BIRDHOUSE_CRAWLER_HOST to target different BIRDHOUSE host.

THIS_FILE="$(readlink -f "$0" || realpath "$0")"
THIS_DIR="$(dirname "${THIS_FILE}")"
COMPOSE_DIR="${COMPOSE_DIR:-$(dirname "${THIS_DIR}")}"

if [ -f "${COMPOSE_DIR}/read-configs.include.sh" ]; then
    . "${COMPOSE_DIR}/read-configs.include.sh"

    # Get BIRDHOUSE_FQDN
    read_configs
fi

# Allow override using same name env var.
if [ -z "${BIRDHOUSE_CRAWLER_HOST}" ]; then
    BIRDHOUSE_CRAWLER_HOST="${BIRDHOUSE_FQDN}"
fi

set -x

curl --include "http://${BIRDHOUSE_CRAWLER_HOST}:8086/pywps?service=WPS&request=execute&version=1.0.0&identifier=pavicrawler&storeExecuteResponse=true&status=true&DataInputs=$*"

set +x

echo "
NOTE the
statusLocation=\"https://HOST/wpsoutputs/catalog/e31a4914-16e8-11ea-aab9-0242ac130014.xml\"
returned in the XML body of the curl command. The status of the crawl,
whether ongoing, failed or success will be in that link.

Once crawler is done, go check the Solr DB at
http://${BIRDHOUSE_CRAWLER_HOST}:8983/solr/#/${THREDDS_SERVICE_DATA_URL_PATH}/query for content inserted by the
crawler.  Just click on \"Execute Query\".
"
