--- Status: 20.07.2017 --- check which commits to the default branch triggered a build SELECT c.project as project, c.branch as branch, c.hash_value as hash_value, CASE WHEN t.git_trigger_commit IS NULL THEN FALSE ELSE TRUE END as build_triggered FROM `project_sample_default_branch.tr_sample_commits_default_branch_during_ci` c LEFT JOIN `travistorrent.travistorrent_11_1_2017` t ON c.project = REPLACE(t.gh_project_name, "/", "_") AND c.branch = REPLACE(t.git_branch, "/", "_") and c.hash_value = t.git_trigger_commit GROUP BY project, branch, hash_value, build_triggered; => project_sample_default_branch.tr_sample_commits_default_branch_during_ci_triggered_build SELECT COUNT(*) FROM `project_sample_default_branch.tr_sample_commits_default_branch_during_ci_triggered_build` WHERE build_triggered = true; => 26,911 SELECT COUNT(*) FROM `project_sample_default_branch.tr_sample_commits_default_branch_during_ci_triggered_build` WHERE build_triggered = false; => 44,839 --- OBSERVATION: Only 39,98% of the commits to the default branch triggered a build (many of them merged into the default branch?). --- check which merges into the default branch triggered a build SELECT c.project as project, c.branch as branch, c.hash_value as hash_value, CASE WHEN t.git_trigger_commit IS NULL THEN FALSE ELSE TRUE END as build_triggered FROM `project_sample_default_branch.tr_sample_merges_default_branch_during_ci` c LEFT JOIN `travistorrent.travistorrent_11_1_2017` t ON c.project = REPLACE(t.gh_project_name, "/", "_") AND c.branch = REPLACE(t.git_branch, "/", "_") and c.hash_value = t.git_trigger_commit GROUP BY project, branch, hash_value, build_triggered; => project_sample_default_branch.tr_sample_merges_default_branch_during_ci_triggered_build SELECT COUNT(*) FROM `msr-mining-challenge.project_sample_default_branch.tr_sample_merges_default_branch_during_ci_triggered_build` WHERE build_triggered = true; => 8,904 SELECT COUNT(*) FROM `msr-mining-challenge.project_sample_default_branch.tr_sample_merges_default_branch_during_ci_triggered_build` WHERE build_triggered = false; => 1,845 --- OBSERVATION: 79,28% of the merges into the default branch triggered a build (rest are merged merges?). --- get build timestamps for each project in the sample to derive mean time frame between builds SELECT s.gh_project_name as gh_project_name, git_branch, tr_build_id, gh_build_started_at FROM `travistorrent.tr_projects_sample` s JOIN `travistorrent.travistorrent_11_1_2017` t ON s.gh_project_name = t.gh_project_name GROUP BY gh_project_name, git_branch, tr_build_id, gh_build_started_at ORDER BY gh_build_started_at; => travistorrent.tr_projects_sample_builds --- filter builds of default branch SELECT gh_project_name, git_branch, tr_build_id, gh_build_started_at FROM `travistorrent.tr_projects_sample_builds` builds INNER JOIN ( SELECT project, branch FROM `project_sample.tr_sample_branches` WHERE default_branch = true ) as default_branch ON REPLACE(builds.gh_project_name, "/", "_") = default_branch.project AND REPLACE(builds.git_branch, "/", "_") = default_branch.branch; => project_sample_default_branch.tr_projects_sample_builds_default_branch --- filter out excluded projects SELECT gh_project_name, git_branch, tr_build_id, gh_build_started_at FROM `travistorrent.tr_projects_sample_builds` builds LEFT JOIN ( SELECT DISTINCT project FROM `project_sample_default_branch.tr_sample_default_branch_activity_filtered` ) AS filtered_projects ON REPLACE(builds.gh_project_name, "/", "_") = filtered_projects.project WHERE filtered_projects.project is not null; => travistorrent.tr_projects_sample_builds_filtered --- filter out excluded projects and only consider default branch SELECT gh_project_name, git_branch, tr_build_id, gh_build_started_at FROM `project_sample_default_branch.tr_projects_sample_builds_default_branch` as builds LEFT JOIN ( SELECT DISTINCT project FROM `project_sample_default_branch.tr_sample_default_branch_activity_filtered` ) AS filtered_projects ON REPLACE(builds.gh_project_name, "/", "_") = filtered_projects.project WHERE filtered_projects.project is not null; => project_sample_default_branch.tr_projects_sample_builds_default_branch_filtered --- It seems like travistorrent contains more branches (tags?) as the data we retrieved... SELECT git_branch FROM `msr-mining-challenge.travistorrent.travistorrent_11_1_2017` WHERE gh_project_name="rails/rails" GROUP BY git_branch; --- OBSERVATION: travistorrent contains many (old?) branches (tags?)