#!/bin/sh

# Operations:
# 1. Registers WPS providers into Weaver to provide their OGC-API interface.
# 2. Validate that Weaver workers are ready to receive tasks.
#
# 1. WPS provider registration
# -----------------------------
#
# The script first authenticates in Magpie to obtain authorization for requests, as everything could be protected.
# Afterwards, there is a quick validation that Weaver is up and ready to receive WPS providers registration requests.
# Then, each provider defined in 'WEAVER_WPS_PROVIDERS' is registered iteratively.
# All requests have a timeout to avoid hanging indefinitely.
# Also, maximum total timeout configured by 'WEAVER_WPS_PROVIDERS_MAX_TIME' will break out if nothing responds.
#
# This script is executed after 'docker-compose up' of the full bird-house stack.
# The reason why WPS providers must be registered after starting everything is because Weaver attempts to query
# them (with GetCapabilities requests) to obtain their metadata.
# Because some WPS containers could take longer to start, it is otherwise not guaranteed that they would be ready
# to receive those requests before Weaver starts querying them.
#
# Parameters:
#
#   WEAVER_WPS_PROVIDERS:
#       list of provider names (comma or space delimited), all are assumed to be available at
#       "${BIRDHOUSE_PROXY_SCHEME}://${BIRDHOUSE_FQDN_PUBLIC}${TWITCHER_PROTECTED_PATH}/<provider-name>"
#   WEAVER_WPS_PROVIDERS_MAX_TIME:
#       limit script execution up to a maximum of this number of seconds
#   WEAVER_WPS_PROVIDERS_RETRY_COUNT:
#       number of permitted retries to register a given WPS provider
#   WEAVER_WPS_PROVIDERS_RETRY_AFTER:
#       number of seconds between each retry request as needed
#
# Following configurations are expected to be inherited from bird-house/weaver-component env.local/default.env:
#   - MAGPIE_ADMIN_USERNAME
#   - MAGPIE_ADMIN_PASSWORD
#   - BIRDHOUSE_FQDN_PUBLIC
#   - TWITCHER_PROTECTED_PATH
#   - WEAVER_MANAGER_NAME
#
# 2. Worker tasks validation
# -----------------------------
#
# When Weaver and its Celery workers are started, tasks often fail to properly register themselves at startup.
# In order to ensure everything is properly configured and ready to receive jobs, Celery checks are employed to
# validate that tasks are known by each component (webapp and workers), and ready to submit/receive them.
# If any of the webapp/worker is not ready, restart them to retry detecting tasks. At that point, it is expected
# that one of the two should have finished registering tasks, and it is just a matter of synchronizing them.
#
# See also the 'celery-healthcheck' script that does the same periodic verification while the images are running,
# but it will not automatically restart them.
#
# Parameters:
#
#   BIRDHOUSE_LOG_DIR (optional, default=/tmp/birdhouse-compose):
#     Location to log results from celery healthcheck outputs.
#

# in case parent script called with debug (sh -x <script>), hide output for this script and reset on exit
# this is important, otherwise curl logs will leak tokens!
old_state="$(set +o)"
# if this script needs live debugging, comment out following line and call the script with (sh -x <script>)
set +x

reset_state() {
  set +vx; eval "${old_state}"
}

# logging
if ! command -v log >/dev/null 2>&1; then
  . "${COMPOSE_DIR}/scripts/logging.include.sh"
fi
PREFIX="[Weaver] "

log INFO "${PREFIX}Running: $0"

MAGPIE_URL="${BIRDHOUSE_PROXY_SCHEME}://${BIRDHOUSE_FQDN_PUBLIC}/magpie"
WEAVER_URL="${BIRDHOUSE_PROXY_SCHEME}://${BIRDHOUSE_FQDN_PUBLIC}${TWITCHER_PROTECTED_PATH}/${WEAVER_MANAGER_NAME}"
WEAVER_WPS_PROVIDERS_MAX_TIME=${WEAVER_WPS_PROVIDERS_MAX_TIME:-120}
WEAVER_WPS_PROVIDERS_RETRY_AFTER=${WEAVER_WPS_PROVIDERS_RETRY_AFTER:-5}
WEAVER_WPS_PROVIDERS_RETRY_COUNT=${WEAVER_WPS_PROVIDERS_RETRY_COUNT:-5}
# double echo and no quotes used on purpose to remove empty/extra newlines/spaces
WEAVER_WPS_PROVIDERS=$(echo $(echo "${WEAVER_WPS_PROVIDERS}" | tr ',' ' '))
REQUEST_TIMEOUT=2

if [ -z "${WEAVER_WPS_PROVIDERS}" ]; then
    log WARN "${PREFIX}Nothing specified in WEAVER_WPS_PROVIDERS to register WPS remote providers."
    reset_state
    exit 0
fi

if [ "${WEAVER_WPS_PROVIDERS_RETRY_COUNT}" -lt 0 ]; then
    WEAVER_WPS_PROVIDERS_RETRY_AFTER=0
    WEAVER_WPS_PROVIDERS_RETRY_COUNT=0
fi
if [ "${WEAVER_WPS_PROVIDERS_RETRY_AFTER}" -lt 0 ]; then
    WEAVER_WPS_PROVIDERS_RETRY_AFTER=0
fi

log INFO "${PREFIX}Requested Weaver WPS providers: [${WEAVER_WPS_PROVIDERS}]"
log INFO "${PREFIX}Will retry requests at most for ${WEAVER_WPS_PROVIDERS_MAX_TIME}s"
log INFO "${PREFIX}Will retry registration of each provider up to ${WEAVER_WPS_PROVIDERS_RETRY_COUNT} times"
log INFO "${PREFIX}Will retry registration of each provider with ${WEAVER_WPS_PROVIDERS_RETRY_AFTER}s intervals"

if [ -z "$WEAVER_CURL_IMAGE" ]; then
    WEAVER_CURL_IMAGE="curlimages/curl:7.87.0"
fi

# POSIX portable RNG if RANDOM does not exist on the current shell
RANDOM_NUMBER=${RANDOM:-$(tr -dc 0-9 < /dev/urandom 2>/dev/null | head -c 5)}

# To know when a docker run was started in case it hangs.
DOCKER_RUN_TAG="weaver_post_curl_$(date -Isecond | sed 's/:/_/g' | sed 's/+/p/g')_${RANDOM_NUMBER}"
curl_cmd() {
    docker run --network "${COMPOSE_PROJECT_NAME:-birdhouse}_default" --rm --name "${DOCKER_RUN_TAG}" "${WEAVER_CURL_IMAGE}" "$@"
}

# pull image if missing to avoid mangling output messages on first call
docker pull "${WEAVER_CURL_IMAGE}"

start_time="$(date -u +%s)"

# ------ PHASE 1 ------
# Magpie Authentication

# registration of WPS providers require authenticated access, obtain login from Magpie
log INFO -n "${PREFIX}Wait for response from Magpie to login [${MAGPIE_URL}]."
while true; do
    # login (output null + cookie-jar '-' redirects output cookie to variable)
    cookie_jar=$( \
        curl_cmd --insecure --silent --location \
             -m ${REQUEST_TIMEOUT} \
             -o /dev/null \
             -X POST \
             -H "Content-Type: application/json" \
             -d "{\"user_name\": \"${MAGPIE_ADMIN_USERNAME}\", \"password\": \"${MAGPIE_ADMIN_PASSWORD}\"}" \
             --cookie-jar - \
             "${MAGPIE_URL}/signin" \
    )
    # trim excess stuff in cookie_jar pseudo-file (comments, empty lines)
    cookie_jar=$(echo "${cookie_jar}" | grep -v '# ' | grep -v -e '^$')
    # validate that cookie(s) are retrieved (empty if bad-auth or invalid endpoint)
    # there can be "duplicate" cookies ".<host>" and "<host>" returned by Magpie behind proxy
    if [ -n "${cookie_jar}" ] && [ "$(echo "${cookie_jar}" | wc -l)" -ge 1 ]; then
        cookies=$(
            printf "%s\n" "$cookie_jar" | while IFS= read -r cookie_line; do
                fields=$(echo "$cookie_line" | wc -w)
                if [ "${fields}" -lt 2 ]; then
                    continue
                fi
                cookie_name=$(echo "$cookie_line" | cut -f $(( fields - 1 )))
                cookie_value=$(echo "$cookie_line" | cut -f "$fields")
                printf "%s=%s; " "$cookie_name" "$cookie_value"
            done
        )
        log INFO -p " OK!"
        break;
    fi

    # interrupt if max time reached
    next_time=$(date -u +%s)
    delta_time=$(( next_time - start_time ))
    if [ ${delta_time} -ge "${WEAVER_WPS_PROVIDERS_MAX_TIME}" ]; then
        msg="Failed to register all providers specified in WEAVER_WPS_PROVIDERS. Magpie is not responding."
        log ERROR "${PREFIX}Timeout (${WEAVER_WPS_PROVIDERS_MAX_TIME}s)! ${msg}"
        reset_state
        exit 11
    fi
    # wait and retry
    sleep 1
    log INFO -p -n "."  # progress on 'wait' line as applicable
done

if [ -z "${cookies}" ]; then
    echo "${ERROR}Failed to retrieve authentication token from Magpie for Weaver WPS providers registration."
    reset_state
    exit 12
fi

# validate that Magpie token retrieved is adequate
log INFO -n "${PREFIX}Validate Magpie token..."
resp=$( \
    curl_cmd --insecure --silent --location \
         -m ${REQUEST_TIMEOUT} \
         -w "\n%{http_code}" \
         -b "${cookies}" \
         -H "Accept: application/json" \
         -X GET \
         "${MAGPIE_URL}/session" \
)
ret=$?   # in case proxy not up yet to receive any request
code=$(echo "${resp}" | tail -n -1)
body=$(echo "${resp}" | head -n -1)
auth=$(echo "${body}" | grep -c '"authenticated": true')
admin=$(echo "${body}" | grep -c '"administrators"')
if [ ${ret} -eq 0 ] && [ "${code}" -eq 200 ] && [ "${auth}" -eq 1 ] && [ "${admin}" -eq 1 ]; then
    log INFO -p " OK!"
else
    log ERROR "${PREFIX}Failed administrative validation of Magpie token for Weaver WPS providers registration."
    reset_state
    exit 13
fi

# ------ PHASE 2 ------
# Weaver WPS Providers

# validate that Weaver is ready to receive requests
log INFO -n "${PREFIX}Wait for response from Weaver [${WEAVER_URL}]."
while true; do
    resp=$( \
        curl_cmd --insecure --silent --location \
             -m ${REQUEST_TIMEOUT} \
             -w "\n%{http_code}" \
             -b "${cookies}" \
             -H "Accept: application/json" \
             -X GET \
             "${WEAVER_URL}/" \
    )
    ret=$?   # in case proxy not up yet to receive any request
    code=$(echo "${resp}" | tail -n -1)
    body=$(echo "${resp}" | head -n -1)
    info=$(echo "${body}" | grep -c '"Weaver Information"')
    if [ ${ret} -eq 0 ] && [ "${code}" -eq 200 ] && [ "${info}" -ne 0 ]; then
        log INFO -p " OK!"
        break;
    fi
    # interrupt if max time reached
    next_time=$(date -u +%s)
    delta_time=$(( next_time - start_time ))
    if [ ${delta_time} -ge "${WEAVER_WPS_PROVIDERS_MAX_TIME}" ]; then
        msg="Failed to register all providers specified in WEAVER_WPS_PROVIDERS. Weaver is not responding."
        log INFO -p ""  # finish 'wait' line as applicable
        log ERROR "${PREFIX}Timeout (${WEAVER_WPS_PROVIDERS_MAX_TIME}s)! ${msg}"
        reset_state
        exit 21
    fi
    # wait and retry
    sleep 1
    log INFO -p -n "."  # progress on 'wait' line as applicable
done

# move on to actual registration of WPS providers
log INFO "${PREFIX}Using URL: [${WEAVER_URL}]"
start_time="$(date -u +%s)"
ret=1
for prov in ${WEAVER_WPS_PROVIDERS}; do
    if [ -z "${prov}" ]; then
        continue
    fi
    prov_url="${BIRDHOUSE_PROXY_SCHEME}://${BIRDHOUSE_FQDN_PUBLIC}${TWITCHER_PROTECTED_PATH}/${prov}"
    prov_cap="${prov_url}?service=WPS&request=GetCapabilities"

    # wait for WPS provider to respond
    log INFO -n "${PREFIX}Wait for response from remote WPS provider [${prov}] on [${prov_url}]."
    while true; do
        # request the URL and obtain the body+http code, then split them for verification
        resp=$( \
            curl_cmd --insecure --silent --location \
                 -m ${REQUEST_TIMEOUT} \
                 -w "\n%{http_code}" \
                 -b "${cookies}" \
                 "${prov_cap}"
        )
        ret=$?   # in case proxy not up yet to receive any request
        code=$(echo "${resp}" | tail -n -1)
        body=$(echo "${resp}" | head -n -1)
        caps=$(echo "${body}" | grep -c "wps:Capabilities")
        if [ ${ret} -eq 0 ] && [ "${code}" -eq 200 ] && [ "${caps}" -ne 0 ]; then
            log INFO -p ""  # finish 'wait' line as applicable
            log INFO "${PREFIX}Got response from endpoint of remote WPS provider [${prov}]."
            break;
        fi

        # interrupt if max time reached
        next_time=$(date -u +%s)
        delta_time=$(( next_time - start_time ))
        if [ ${delta_time} -ge "${WEAVER_WPS_PROVIDERS_MAX_TIME}" ]; then
            msg="Failed to register all providers specified in WEAVER_WPS_PROVIDERS: [${prov}] is not responding."
            log INFO -p ""  # finish 'wait' line as applicable
            log ERROR "${PREFIX}Timeout (${WEAVER_WPS_PROVIDERS_MAX_TIME}s)! ${msg}"
            reset_state
            exit 22
        fi
        # wait and retry
        sleep 1
        log INFO -p -n "."  # progress on 'wait' line as applicable
    done

    retry=0
    retry_msg=""
    total=${WEAVER_WPS_PROVIDERS_RETRY_COUNT}
    while true; do
        if [ ${retry} -ne 0 ]; then
          retry_msg=" (retry: ${retry}/${total})"
        fi
        # unregister in case of multiple up/down to regenerate from scratch, don't care if NotFound returned
        log INFO -n "${PREFIX}Unregistering any remote WPS provider matching [${prov}]${retry_msg}... "
        log INFO -p -n $( \
            curl_cmd --insecure --silent --location \
             -m ${REQUEST_TIMEOUT} \
             -w "=> Delete response: %{http_code}" -o /dev/null \
             -b "${cookies}" \
             -X DELETE \
             "${WEAVER_URL}/providers/${prov}" \
        )
        log INFO -p " OK!"  # finish 'unregister' line as applicable

        # register the new provider and validate
        payload="{\"id\": \"${prov}\", \"url\": \"${prov_url}\"}"
        log DEBUG "${PREFIX}Registration payload: ${payload}"
        log DEBUG "${PREFIX}Registration cookies: ${cookies}"
        log INFO -n "${PREFIX}Registering remote WPS provider [${prov}] on [${prov_url}]${retry_msg}... "
        resp=$( \
            curl_cmd --insecure --silent --location \
                 -m ${REQUEST_TIMEOUT} \
                 -w "\n%{http_code}\n" \
                 -b "${cookies}" \
                 -H "Content-Type: application/json" \
                 -X POST \
                 -d "${payload}" \
                 "${WEAVER_URL}/providers" \
        )
        ret=$?
        code=$(echo "${resp}" | tail -n -1)
        body=$(echo "${resp}" | head -n -1)
        if [ ${ret} -ne 0 ] || [ "${code}" -ne 201 ]; then
            log INFO -p ""  # finish 'register' line as applicable
            log WARN "${PREFIX}Failed registration of remote WPS provider [${prov}] on [${prov_url}]${retry_msg}."
            log WARN "${PREFIX}Error response content:\n${body}"
            if [ ${retry} -ge ${total} ]; then
              echo "${ERROR}Maximum retry attempts ${total} reached for WPS provider [${prov}]. Aborting."
              reset_state
              exit 23
            fi
            log WARN "${PREFIX}Will retry after ${WEAVER_WPS_PROVIDERS_RETRY_AFTER}s..."
            sleep ${WEAVER_WPS_PROVIDERS_RETRY_AFTER}
            retry=$((retry+1))
        else
            log INFO -p " OK!"  # displayed on same line after first registration
            break
        fi
    done
done
log INFO "${PREFIX}All Weaver remote WPS providers registered successfully!"

if [ x"${WEAVER_UNREGISTER_DROPPED_PROVIDERS}" = x"True" ]; then
    # Get all registered providers whether they are working or not
    all_providers_resp=$( \
        curl_cmd --insecure --silent --location \
            -m ${REQUEST_TIMEOUT} \
            -b "${cookies}" \
            "${WEAVER_URL}/providers?check=false&detail=false" \
    )
    # Get all registered working providers
    working_providers_resp=$( \
        curl_cmd --insecure --silent --location \
            -m ${REQUEST_TIMEOUT} \
            -b "${cookies}" \
            "${WEAVER_URL}/providers?check=true&detail=false" \
    )
    working_providers=$(echo "$working_providers_resp" | tr '\n' ' ' | \
                        sed -e 's/.*"providers":\[//' -e 's/\].*//' -e 's/[",]/ /g')
    for prov in $(echo "$all_providers_resp" | sed -e 's/.*"providers":\[//' -e 's/\].*//' -e 's/[",]/ /g'); do
        # Note: both WEAVER_WPS_PROVIDERS and working_providers are whitespace delimited with no newlines
        if echo " ${WEAVER_WPS_PROVIDERS} " | grep -qv "[[:space:]]${prov}[[:space:]]" && \
           echo " ${working_providers} " | grep -qv "[[:space:]]${prov}[[:space:]]"; then
            # unregister provider that is no longer specified in WEAVER_WPS_PROVIDERS and is no longer working
            log INFO "${PREFIX}Unregistering the remote WPS provider matching [${prov}] not in WEAVER_WPS_PROVIDERS."
            curl_cmd --insecure --silent --location \
                 -m ${REQUEST_TIMEOUT} \
                 -b "${cookies}" \
                 -X DELETE \
                 "${WEAVER_URL}/providers/${prov}"
            fi
    done
fi

log INFO "${PREFIX}Starting Weaver WebApp/Worker Celery tasks validation..."
CUR_SCRIPT_DIR="$(dirname "$(realpath "$0")")"
BIRDHOUSE_COMPOSE="${BIRDHOUSE_COMPOSE:-"$(realpath "${CUR_SCRIPT_DIR}/../../birdhouse-compose.sh")"}"
BIRDHOUSE_LOG_DIR="${BIRDHOUSE_LOG_DIR:-/tmp/birdhouse-compose}"
CELERY_HEALTHCHECK="/opt/local/bin/weaver/celery-healthcheck"
mkdir -p "${BIRDHOUSE_LOG_DIR}"
# note: use 'tee' instead of capturing in variable to allow displaying results directly when running command
${BIRDHOUSE_COMPOSE} exec weaver bash "${CELERY_HEALTHCHECK}" | tee "${BIRDHOUSE_LOG_DIR}/weaver.log"
ret_weaver=$?
out_weaver=$(cat "${BIRDHOUSE_LOG_DIR}/weaver.log" | tail -n 1 | grep -c "ERROR") || true
${BIRDHOUSE_COMPOSE} exec weaver-worker bash "${CELERY_HEALTHCHECK}" | tee "${BIRDHOUSE_LOG_DIR}/weaver-worker.log"
ret_worker=$?
out_worker=$(cat "${BIRDHOUSE_LOG_DIR}/weaver-worker.log" | tail -n 1 | grep -c "ERROR") || true
if [ ${ret_weaver} -ne 0 ] || [ ${ret_worker} -ne 0 ] || [ "${out_weaver}" -ne 0 ] || [ "${out_worker}" -ne 0 ]; then
  log INFO "${PREFIX}Weaver WebApp and/or Worker Celery tasks were not ready. Restarting both..."
  ${BIRDHOUSE_COMPOSE} restart weaver weaver-worker
else
  log INFO "${PREFIX}Weaver WebApp and/or Worker Celery tasks are both ready."
fi

reset_state
