WITH OAX_AUTHORS AS (

SELECT DISTINCT

UPPER(TRIM(SUBSTRING(doi, 17))) as doi,
publication_year as year,
---/ corresponding authors not included in reported analysis /
---IF(ARRAY_LENGTH(corresponding_institution_ids) > 0, TRUE, FALSE) as has_corresponding_inst,
---authors.is_corresponding as author_is_corresponding,
institution.country_code as country


FROM `openalex.works_snapshot20230709` as a 
LEFT JOIN UNNEST(authorships) AS authors
LEFT JOIN UNNEST(institutions) AS institution
WHERE doi is not null AND a.type = 'article' AND publication_year BETWEEN 2012 AND 2022
),


OAX_COUNTRY AS (

SELECT
doi,
year,
country,

---CASE WHEN countif(has_corresponding_inst) > 0 THEN TRUE ELSE FALSE END as has_corresponding_inst,
---CASE WHEN countif(author_is_corresponding) > 0 THEN TRUE ELSE FALSE END as is_corresponding,

FROM OAX_AUTHORS
WHERE country IN ("AT", "BE", "BG", "HR", "DK", "EE", "FI", "FR", "DE", "GR", "HU", "IE", "IT", "LV", "LT", "LU", "MT", "NO", "PL", "PT", "RO", "SK", "SI", "ES", "SE", "CH", "CY", "CZ", "NL", "GB")


GROUP BY doi, year, country

),

---/ create truthtable for APC T/F for all issn-l's in UPW /
DOAJ AS (

SELECT
Journal_ISSN__print_version_ as doaj_issn,
Journal_EISSN__online_version_ as doaj_eissn,
Journal_title as doaj_journal_title,
APC, ---/ verified that APC is either T or F (no nulls) /

FROM `doaj.doaj_20230830`
),

UPW_DOAJ AS (
SELECT
journal_issns as upw_issns, --- / comma-separated string /
journal_issn_l as upw_issnl,
FROM `unpaywall.unpaywall_snapshot20230827`

WHERE journal_is_in_doaj IS TRUE
),


UPW_DOAJ_JOIN AS (

SELECT DISTINCT
---/ verified that all issn-l's occur only once in TABLE_JOIN /

UPW_DOAJ.upw_issnl,
DOAJ.APC

FROM UPW_DOAJ
INNER JOIN DOAJ
ON (
DOAJ.doaj_issn = UPW_DOAJ.upw_issnl
OR DOAJ.doaj_eissn = UPW_DOAJ.upw_issnl
OR DOAJ.doaj_issn IN (UPW_DOAJ.upw_issns)
OR DOAJ.doaj_eissn IN (UPW_DOAJ.upw_issns))

),

UPW_ISSN_TRUTHTABLE AS (

SELECT

upw_issnl,
CASE WHEN (APC IS TRUE) THEN TRUE ELSE FALSE END as has_apc,

FROM UPW_DOAJ_JOIN

),

-----

UPW_TRUTHTABLE AS

(SELECT

UPPER(TRIM(doi)) as doi,
journal_issn_l as upw_issnl,
is_oa,
CASE WHEN (oa_status = "gold") THEN TRUE ELSE FALSE END as gold,
CASE WHEN (oa_status = "gold" AND journal_is_in_doaj = FALSE) THEN TRUE ELSE FALSE END as gold_non_doaj,
CASE WHEN (oa_status = "gold" AND journal_is_in_doaj = TRUE) THEN TRUE ELSE FALSE END as gold_doaj,
CASE WHEN (oa_status = "hybrid") THEN TRUE ELSE FALSE END as hybrid,
CASE WHEN (oa_status = "bronze") THEN TRUE ELSE FALSE END as all_bronze,
CASE WHEN (oa_status = "bronze" AND (SELECT COUNT(1) FROM UNNEST(oa_locations) AS location WHERE (location.host_type = "repository" AND location.version IN ("acceptedVersion", "publishedVersion"))) = 0 ) THEN TRUE ELSE FALSE END as bronze_no_green_accpub,
CASE WHEN (oa_status = "bronze" AND (SELECT COUNT(1) FROM UNNEST(oa_locations) AS location WHERE (location.host_type = "repository" AND location.version IN ("acceptedVersion", "publishedVersion"))) > 0 ) THEN TRUE ELSE FALSE END as bronze_green_accpub,
CASE WHEN (oa_status = "green" AND (SELECT COUNT(1) FROM UNNEST(oa_locations) AS location WHERE (location.host_type = "repository" AND location.version IN ("acceptedVersion", "publishedVersion"))) > 0 ) THEN TRUE ELSE FALSE END as green_only_accpub,
CASE WHEN (oa_status = "green" AND (SELECT COUNT(1) FROM UNNEST(oa_locations) AS location WHERE (location.host_type = "repository" AND location.version IN ("submittedVersion"))) > 0 ) THEN TRUE ELSE FALSE END as green_only_preprint,
CASE WHEN (oa_status = "green" AND (SELECT COUNT(1) FROM UNNEST(oa_locations) AS location WHERE (location.host_type = "repository")) > 0 ) THEN TRUE ELSE FALSE END as green_only_all,
CASE WHEN (oa_status = "closed") THEN TRUE ELSE FALSE END as closed,

FROM `unpaywall.unpaywall_snapshot20230827`
),


UPW_TRUTHTABLE_2 AS (

SELECT

*

FROM UPW_TRUTHTABLE
LEFT JOIN UPW_ISSN_TRUTHTABLE
USING (upw_issnl)

),


COUNTRY_TRUTHTABLE AS (

SELECT

*

FROM OAX_COUNTRY
LEFT JOIN UPW_TRUTHTABLE_2
USING (doi)

),

TABLE_AGGREGATE AS (


SELECT

country,
year,
COUNT(DISTINCT IF(is_oa is not null, doi, null)) as dois,
COUNT(DISTINCT IF(is_oa, doi, null)) as num_oa,
COUNT(DISTINCT IF(gold_doaj AND has_apc = FALSE, doi, null)) as num_gold_doaj_non_apc,
COUNT(DISTINCT IF(gold_doaj, doi, null)) - COUNT(DISTINCT IF(gold_doaj AND has_apc = FALSE, doi, null)) as num_gold_doaj_apc,
COUNT(DISTINCT IF(gold_non_doaj, doi, null)) as num_gold_non_doaj,
COUNT(DISTINCT IF(hybrid, doi, null)) as num_hybrid,
COUNT(DISTINCT IF(bronze_no_green_accpub, doi, null)) as num_bronze_no_green_accpub,
COUNT(DISTINCT IF(bronze_green_accpub, doi, null)) as num_bronze_green_accpub,
COUNT(DISTINCT IF(green_only_accpub, doi, null)) as num_green_only_accpub,
COUNT(DISTINCT IF(green_only_preprint, doi, null)) as num_green_only_preprint,


COUNT(DISTINCT IF(closed, doi, null)) + COUNT(DISTINCT IF(green_only_all, doi, null)) - COUNT(DISTINCT IF(green_only_accpub, doi, null))- COUNT(DISTINCT IF(green_only_preprint, doi, null)) as num_closed,

FROM COUNTRY_TRUTHTABLE
---/ corresponding authors not included in reported analysis /
---WHERE has_corresponding_inst = TRUE AND is_corresponding = TRUE

GROUP BY country, year
ORDER BY country, year DESC

)

SELECT * FROM COUNTRY_TRUTHTABLE