-- SPDX-FileCopyrightText: 2021-2022 Davide Rossi <daviderossi@unibo.it>
-- SPDX-FileCopyrightText: 2020-2021 Stefano Zacchiroli <zack@upsilon.cc>
-- SPDX-License-Identifier: GPL-3.0-or-later

-- Postgres schema for a simple DB to be used for data analysis of the zones
-- of commit authors over time. Supports the commit/author/committer sub-part
-- of the Software Heritage DB.

drop table if exists country_by_email;
drop table if exists person;
drop table if exists commit;
drop table if exists commit_zone;
drop domain if exists sha1;


-- convert a pair UTC timestamp/tz offset to a local time without timezone
--
-- ts: absolute time as a UTC timetamp
-- tz: time offset w.r.t. UTC in minutes
--
-- return: local time in the given time zone
create or replace function timestamp_to_localtime(ts timestamptz, tz integer)
  returns timestamp
as $$
  -- select ts at time zone interval ((tz / 60)::text || ':' || (abs(tz) % 60)::text)
  select (ts + make_interval(mins => tz)) :: timestamp
$$ language sql strict immutable;

-- create domain sha1 as bytea check (length(value) = 20);

create table commit (
  -- id              sha1         not null,  -- not needed at present and takes a lot of space
  id              bigserial    not null,  -- synthetic key, cross referencing line numbers
                                          -- in CSV input data (commits.csv.zst)
  author_date     timestamptz  not null,  -- author timestamp (UTC)
  author_tz       smallint     not null,  -- author timezone offset, in minutes wrt UTC
  author          bigint       not null   -- author ID, referencing person.id
  -- we focus our analysis on authors, so we do not need these ones:
  --
  -- committer_date  timestamptz  not null,  -- committer timestamp (UTC)
  -- committer_tz    smallint     not null,  -- committer timezone offset, in minutes wrt UTC
  -- committer       bigint       not null   -- committer ID, referencing person.id
);

create table person (
  id        bigint  not null,
  fullname  text    not null,
  email     text
);

create table country_by_email (
  person    bigint  not null,  -- references person(id)
  country   text    not null
);

-- this table is used to connect countries as detected from emails' ccTLD
-- to their world zones
create table country_zone (
  country  text   primary key, 
  zone     text   not null
);

create table commit_zone (
  commit  bigint not null,
  zone    text not null
);
