#!/usr/bin/perl
#
# Analyze the data into values that are related to our hypotheses

use strict;
use warnings;

use Time::Local;

# Setup symbolic names for the records fields

use constant {
	FILENAME => 0,
	DATE => 1,
	DAYTIME => 2,
	TZ => 3,
# Begin from header.txt.  Generate with
# awk '{print "\t" $2 " => " $1 + 3 ","}' ~/src/cqmetrics/header.txt | tr a-z A-Z
	NCHAR => 4,
	NLINE => 5,
	LINE_LENGTH_MIN => 6,
	LINE_LENGTH_MEAN => 7,
	LINE_LENGTH_MEDIAN => 8,
	LINE_LENGTH_MAX => 9,
	LINE_LENGTH_SD => 10,
	NFUNCTION => 11,
	NSTATEMENT => 12,
	STATEMENT_NESTING_MIN => 13,
	STATEMENT_NESTING_MEAN => 14,
	STATEMENT_NESTING_MEDIAN => 15,
	STATEMENT_NESTING_MAX => 16,
	STATEMENT_NESTING_SD => 17,
	NINTERNAL => 18,
	NCONST => 19,
	NENUM => 20,
	NGOTO => 21,
	NINLINE => 22,
	NNOALIAS => 23,
	NREGISTER => 24,
	NRESTRICT => 25,
	NSIGNED => 26,
	NSTRUCT => 27,
	NUNION => 28,
	NUNSIGNED => 29,
	NVOID => 30,
	NVOLATILE => 31,
	NTYPEDEF => 32,
	NCOMMENT => 33,
	NCOMMENT_CHAR => 34,
	NBOILERPLATE_COMMENT_CHAR => 35,
	NDOX_COMMENT => 36,
	NDOX_COMMENT_CHAR => 37,
	NFUN_COMMENT => 38,
	NCPP_DIRECTIVE => 39,
	NCPP_INCLUDE => 40,
	NCPP_CONDITIONAL => 41,
	NFUN_CPP_DIRECTIVE => 42,
	NFUN_CPP_CONDITIONAL => 43,
	STYLE_INCONSISTENCY => 44,
	NFUNCTION => 45,
	HALSTEAD_MIN => 46,
	HALSTEAD_MEAN => 47,
	HALSTEAD_MEDIAN => 48,
	HALSTEAD_MAX => 49,
	HALSTEAD_SD => 50,
	NFUNCTION2 => 51,
	CYCLOMATIC_MIN => 52,
	CYCLOMATIC_MEAN => 53,
	CYCLOMATIC_MEDIAN => 54,
	CYCLOMATIC_MAX => 55,
	CYCLOMATIC_SD => 56,
	NIDENTIFIER => 57,
	IDENTIFIER_LENGTH_MIN => 58,
	IDENTIFIER_LENGTH_MEAN => 59,
	IDENTIFIER_LENGTH_MEDIAN => 60,
	IDENTIFIER_LENGTH_MAX => 61,
	IDENTIFIER_LENGTH_SD => 62,
	UNIQUE_NIDENTIFIER => 63,
	UNIQUE_IDENTIFIER_LENGTH_MIN => 64,
	UNIQUE_IDENTIFIER_LENGTH_MEAN => 65,
	UNIQUE_IDENTIFIER_LENGTH_MEDIAN => 66,
	UNIQUE_IDENTIFIER_LENGTH_MAX => 67,
	UNIQUE_IDENTIFIER_LENGTH_SD => 68,
	INDENTATION_SPACING_COUNT => 69,
	INDENTATION_SPACING_MIN => 70,
	INDENTATION_SPACING_MEAN => 71,
	INDENTATION_SPACING_MEDIAN => 72,
	INDENTATION_SPACING_MAX => 73,
	INDENTATION_SPACING_SD => 74,
	NNO_SPACE_AFTER_BINARY_OP => 75,
	NNO_SPACE_AFTER_CLOSING_BRACE => 76,
	NNO_SPACE_AFTER_COMMA => 77,
	NNO_SPACE_AFTER_KEYWORD => 78,
	NNO_SPACE_AFTER_OPENING_BRACE => 79,
	NNO_SPACE_AFTER_SEMICOLON => 80,
	NNO_SPACE_BEFORE_BINARY_OP => 81,
	NNO_SPACE_BEFORE_CLOSING_BRACE => 82,
	NNO_SPACE_BEFORE_KEYWORD => 83,
	NNO_SPACE_BEFORE_OPENING_BRACE => 84,
	NSPACE_AFTER_OPENING_SQUARE_BRACKET => 85,
	NSPACE_AFTER_STRUCT_OP => 86,
	NSPACE_AFTER_UNARY_OP => 87,
	NSPACE_AT_END_OF_LINE => 88,
	NSPACE_BEFORE_CLOSING_BRACKET => 89,
	NSPACE_BEFORE_CLOSING_SQUARE_BRACKET => 90,
	NSPACE_BEFORE_COMMA => 91,
	NSPACE_BEFORE_OPENING_SQUARE_BRACKET => 92,
	NSPACE_BEFORE_SEMICOLON => 93,
	NSPACE_BEFORE_STRUCT_OP => 94,
	NSPACE_AFTER_BINARY_OP => 95,
	NSPACE_AFTER_CLOSING_BRACE => 96,
	NSPACE_AFTER_COMMA => 97,
	NSPACE_AFTER_KEYWORD => 98,
	NSPACE_AFTER_OPENING_BRACE => 99,
	NSPACE_AFTER_SEMICOLON => 100,
	NNO_SPACE_AFTER_STRUCT_OP => 101,
	NSPACE_BEFORE_BINARY_OP => 102,
	NSPACE_BEFORE_CLOSING_BRACE => 103,
	NSPACE_BEFORE_KEYWORD => 104,
	NSPACE_BEFORE_OPENING_BRACE => 105,
	NNO_SPACE_BEFORE_STRUCT_OP => 106,
	NNO_SPACE_AFTER_OPENING_SQUARE_BRACKET => 107,
	NNO_SPACE_AFTER_UNARY_OP => 108,
	NNO_SPACE_BEFORE_CLOSING_BRACKET => 109,
	NNO_SPACE_BEFORE_CLOSING_SQUARE_BRACKET => 110,
	NNO_SPACE_BEFORE_COMMA => 111,
	NNO_SPACE_BEFORE_OPENING_SQUARE_BRACKET => 112,
	NNO_SPACE_BEFORE_SEMICOLON => 113,
# End from header.txt
	NKLUDGE => 114,
};

# Input fields
my @f;

# Read the records of all files in a snapshot and produce the composite values
my $nfile = 0;
my $time = 0;
# Denominators
my $nstatement = 0;
my $nline = 0;
my $nfunction = 0;
my $nconsistency_cases = 0;
my $nchar = 0;
my $indentation_count = 0;
my $sline_length_median = 0;

# Nominators
my $nregister = 0;
my $nrestrict = 0;
my $nenum = 0;
my $nvoid = 0;
my $nsigned = 0;
my $nunsigned = 0;
my $nconst = 0;
my $nvolatile = 0;
my $ninline = 0;
my $nnoalias = 0;
my $ngoto = 0;
my $ncpp_include = 0;
my $ncpp_noinclude = 0;
my $ncpp_conditional = 0;
my $ninternal = 0;
my $ncomment = 0;
my $ncomment_char = 0;
my $indentation_q = 0;
my $indentation;
my $sindentation_median = 0;
my $nkludge = 0;
my $nidentifier = 0;
my $ninconsistent_cases = 0;
my $nidentifier_length = 0;
my $sidentifier_length_median = 0;
my $nesting = 0;
my $snesting_median = 0;

# Accumulate inconsistency values into consistency and inconsistency cases
sub ia
{
	my($a, $b) = @_;

	$nconsistency_cases += $f[$a] + $f[$b];
	$ninconsistent_cases += ($f[$a] > $f[$b]) ? $f[$b] : $f[$a];
}

sub
sqr
{
	my ($a) = @_;
	return $a * $a;
}

while (<>) {
	chop;
	@f = split(/\t/, $_);
	if ($#f != NKLUDGE) {
		print STDERR "Short line with $#f fields\n";
		s/\t/\\t/g;
		print STDERR "$_\n";
		exit 1;
	}
	$nfile++;
	my ($year, $month, $mday) = split(/-/, $f[DATE]);
	$month--;
	$time += timegm(0, 0, 0, $mday, $month, $year);

	$nstatement += $f[NSTATEMENT];
	$nline += $f[NLINE];
	$nfunction += $f[NFUNCTION];
	$nchar += $f[NCHAR];
	$sline_length_median += $f[LINE_LENGTH_MEDIAN] unless ($f[LINE_LENGTH_MEAN] eq '');

	$nregister += $f[NREGISTER];
	$nrestrict += $f[NRESTRICT];
	$nenum += $f[NENUM];
	$nvoid += $f[NVOID];
	$nsigned += $f[NSIGNED];
	$nunsigned += $f[NUNSIGNED];
	$nconst += $f[NCONST];
	$nvolatile += $f[NVOLATILE];
	$ninline += $f[NINLINE];
	$nnoalias += $f[NNOALIAS];
	$ngoto += $f[NGOTO];
	$ncpp_include += $f[NCPP_INCLUDE];
	$ncpp_noinclude += $f[NCPP_DIRECTIVE] - $f[NCPP_INCLUDE];
	$ncpp_conditional += $f[NCPP_CONDITIONAL];
	$ninternal += $f[NINTERNAL];
	$ncomment += $f[NCOMMENT];
	$ncomment_char += $f[NCOMMENT_CHAR] - $f[NBOILERPLATE_COMMENT_CHAR];
	$nkludge += $f[NKLUDGE];

	$nidentifier_length += $f[IDENTIFIER_LENGTH_MEAN] * $f[NIDENTIFIER] unless ($f[IDENTIFIER_LENGTH_MEAN] eq '');
	$sidentifier_length_median += $f[IDENTIFIER_LENGTH_MEDIAN] unless ($f[IDENTIFIER_LENGTH_MEAN] eq '');
	$nidentifier += $f[NIDENTIFIER];

	# Unassemble indentation
	$indentation_q += sqr($f[INDENTATION_SPACING_SD]) * $f[INDENTATION_SPACING_COUNT] unless ($f[INDENTATION_SPACING_SD] eq '');
	$indentation += $f[INDENTATION_SPACING_MEAN] * $f[INDENTATION_SPACING_COUNT] unless ($f[INDENTATION_SPACING_MEAN] eq '');
	$sindentation_median += $f[INDENTATION_SPACING_MEDIAN] unless ($f[INDENTATION_SPACING_MEDIAN] eq '');
	$indentation_count += $f[INDENTATION_SPACING_COUNT];
	$nesting += $f[STATEMENT_NESTING_MEAN] * $f[NSTATEMENT] unless ($f[STATEMENT_NESTING_MEAN] eq '');
	$snesting_median += $f[STATEMENT_NESTING_MEDIAN] unless ($f[STATEMENT_NESTING_MEDIAN] eq '');
	ia(NSPACE_AFTER_BINARY_OP, NNO_SPACE_AFTER_BINARY_OP);
	ia(NSPACE_AFTER_CLOSING_BRACE, NNO_SPACE_AFTER_CLOSING_BRACE);
	ia(NSPACE_AFTER_COMMA, NNO_SPACE_AFTER_COMMA);
	ia(NSPACE_AFTER_KEYWORD, NNO_SPACE_AFTER_KEYWORD);
	ia(NSPACE_AFTER_OPENING_BRACE, NNO_SPACE_AFTER_OPENING_BRACE);
	ia(NSPACE_AFTER_SEMICOLON, NNO_SPACE_AFTER_SEMICOLON);
	ia(NSPACE_AFTER_STRUCT_OP, NNO_SPACE_AFTER_STRUCT_OP);
	ia(NSPACE_BEFORE_BINARY_OP, NNO_SPACE_BEFORE_BINARY_OP);
	ia(NSPACE_BEFORE_CLOSING_BRACE, NNO_SPACE_BEFORE_CLOSING_BRACE);
	ia(NSPACE_BEFORE_KEYWORD, NNO_SPACE_BEFORE_KEYWORD);
	ia(NSPACE_BEFORE_OPENING_BRACE, NNO_SPACE_BEFORE_OPENING_BRACE);
	ia(NSPACE_BEFORE_STRUCT_OP, NNO_SPACE_BEFORE_STRUCT_OP);
	ia(NSPACE_AFTER_OPENING_SQUARE_BRACKET, NNO_SPACE_AFTER_OPENING_SQUARE_BRACKET);
	ia(NSPACE_AFTER_UNARY_OP, NNO_SPACE_AFTER_UNARY_OP);
	ia(NSPACE_BEFORE_CLOSING_BRACKET, NNO_SPACE_BEFORE_CLOSING_BRACKET);
	ia(NSPACE_BEFORE_CLOSING_SQUARE_BRACKET, NNO_SPACE_BEFORE_CLOSING_SQUARE_BRACKET);
	ia(NSPACE_BEFORE_COMMA, NNO_SPACE_BEFORE_COMMA);
	ia(NSPACE_BEFORE_OPENING_SQUARE_BRACKET, NNO_SPACE_BEFORE_OPENING_SQUARE_BRACKET);
	ia(NSPACE_BEFORE_SEMICOLON, NNO_SPACE_BEFORE_SEMICOLON);
}

# Print the results
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($time / $nfile);
$mon++;
$year += 1900;

# Separate output records by tab
$, = "\t";
$\ = "\n";

print
	int($time / $nfile),		# Epoch: Unix epoch time
	sprintf("%04d-%02d-%02d", $year, $mon, $mday),	# Date: ISO date
	$nstatement,			# Nstatement: Number of statements
	$nline,				# Nline: Number of lines
	$nfunction,			# Nfunction: Number of functions
	$nstatement / $nline,		# Dstatement: Statement density
	$nchar / $nline,		# MlineLen: Mean line length (characters)
	$sline_length_median / $nfile,	# AMlineLen: Average median line length (characters)
	$nline / $nfile,		# MfileLen: Mean file length (lines)
	$nstatement / $nfile,		# MfileStatement: Mean file functionality (stmts)
	$nline / $nfunction,		# MfunLen: Mean function length (lines)
	$nregister / $nstatement,	# Dregister: register keyword density
	$nrestrict / $nstatement,	# Drestrict: restrict keyword density
	$nenum / $nstatement,		# Denum: enum keyword density
	$nvoid / $nstatement,		# Dvoid: void keyword density
	$nsigned / $nstatement,		# Dsigned: signed keyword density
	$nunsigned / $nstatement,	# Dunsigned: unsigned keyword density
	$nconst / $nstatement,		# Dconst: const keyword density
	$nvolatile / $nstatement,	# Dvolatile: volatile keyword density
	$ninline / $nstatement,		# Dinline: inline keyword density
	$nnoalias / $nstatement,	# Dnoalias: noalias keyword density
	$ngoto / $nstatement,		# Dgoto: goto keyword density
	$ncpp_include / $nline,		# DcppInclude: C preprocessor include statement density
	$ncpp_noinclude / $nline,	# DcppNoInclude: C preprocessor non-include statement density
	$ncpp_conditional / $nline,	# CcppConditional: C preprocessor conditional density
	$ninternal / $nstatement,	# Dinternal: Internally visible declaration density
	$ncomment / $nline,		# Dcomment: Comment density
	$ncomment_char / $nchar,	# DcommentChar: Comment character density
	$ncomment_char / $ncomment,	# McommentSize: Mean comment size
	$nkludge / $nline,		# Dkludge: Kludge word density
	$nesting / $nstatement,		# Mnesting: Mean statement nesting
	$snesting_median / $nfile,	# AMnesting: Average median statement nesting
	$nidentifier_length / $nidentifier,		# MidentifierLength: Mean identifier length
	$sidentifier_length_median / $nfile,		# AMidentifierLength: Average median identifier length
	$indentation / $indentation_count,		# MindentationSpace: Mean indentation spaces
	$sindentation_median / $nfile,			# AMindentationSpace: Average median indentation spaces
	$ninconsistent_cases / $nconsistency_cases,	# Inconsistency: Formatting inconsistency
	sqrt($indentation_q) / $indentation_count;	# SDindentation: Indentation standard deviation
