'''
@author: Andrew
'''

import sys
sys.path.append("path to scripts directory")
from utility import git
from nltk.stem import PorterStemmer
if len(sys.argv) != 9:
    print "expecting arguments: (1) a link to the remote repo, (2) a path to the local folder for checking out the remote repo, (3) a file containing keywords for log analysis, (4) a file containing keywords for diff analysis, (5) remove patterns, (6) the path to the analysis output file, (7) use existing repo (True: do not clone, False: clone), (8) diff size limit (be careful, this will ignore commits with huge diffs)"
    sys.exit()

url = sys.argv[1]
dir = sys.argv[2]
logKeywordConfigFile = sys.argv[3]
diffKeywordConfigFile = sys.argv[4]
removePatternConfigFile = sys.argv[5]
outputPath = sys.argv[6]
useExisting = sys.argv[7]
diffSize = int(sys.argv[8])
skipRevs = []

ps = PorterStemmer()

# read log keywords from the config file
log_keywords = []
with open(logKeywordConfigFile) as f:
    for line in f:
        log_keywords.append(ps.stem(line.strip()))

print log_keywords

# read diff keywords from the config file
diff_keywords = []
with open(diffKeywordConfigFile) as f:
    for line in f:
        diff_keywords.append(line.strip())

print diff_keywords

# read remove patterns from the config file
remove_patterns = []
with open(removePatternConfigFile) as f:
    for line in f:
        remove_patterns.append(line.strip())

print remove_patterns

if useExisting == "False":
    succ = git.cloneProject(url, dir)

    if succ:
        print "clone successful, starting to analyze repo..."
        git.findLogsAndDiffs(dir, log_keywords, diff_keywords, remove_patterns, outputPath, skipRevs, diffSize)
    else:
        print "clone failed, exit..."
elif useExisting == "True":
    print "directly starting analyzing repo..."
    git.findLogsAndDiffs(dir, log_keywords, diff_keywords, remove_patterns, outputPath, skipRevs, diffSize)
else:
    print "wrong last argument ..."
