-- SPDX-FileCopyrightText: 2021-2022 Davide Rossi <daviderossi@unibo.it>
-- SPDX-FileCopyrightText: 2020 Stefano Zacchiroli <zack@upsilon.cc>
-- SPDX-License-Identifier: GPL-3.0-or-later

-- Import data for analysis into a DB with the schema of schema.sql

\timing

\copy person from program 'zstdcat authors--plausible.csv.zst | cut -f 1-3' with (format csv, delimiter E'\t')

-- import all fields, including commit id
-- \copy commit from program 'zstdcat commits--clean.csv.zst | sed s/^/\\\\x/' with (format csv, delimiter ',')

-- notes:
-- 1. we filter out SHA1 commit ids, as they take 20 bytes * 2 B commits = ~37 GB
-- 2. we only import author information and leave out committer information
-- 3. the bigserial commit.id is populated automatically by postgres and can be
--    used to cross-reference line numbers in commits.csv.zst to retrieve SHA1
--    commit IDs if needed
\copy commit (author_date, author_tz, author) from program 'zstdcat commits.csv.zst | cut -f 2-4 -d,' with (format csv, delimiter ',')

\copy country_zone from c_z.tab

\copy country_by_email from program 'zstdcat author-country-by-email.tab.zst | cut -f 1,4' with (format csv, delimiter E'\t')
