#!/usr/bin/env python3
#

"""Change text corpus pair to character token format with _ spaces."""

import sys
from sys import argv

if len(argv) != 2:
    print("Usage:", argv[0], "STUFF")
    sys.exit(1)

print("Reading from", argv[1])
f = open(argv[1])
print("Writing to", argv[1] + ".json")
g = open(argv[1] + ".json", "w")

print("[", file=g)
for line in f:
    parts = line.strip().split("\t")
    error = parts[0]
    if len(parts) == 1:
        correct = parts[0]
    elif len(parts) > 1:
        correct = parts[1]
    print("\t{", file=g)
    print('\t\t"correct": "', correct, '",', sep='', file=g)
    print('\t\t"error": [', file=g)
    for word in error.split():
        print('\t\t\t"', word, '",', sep='', file=g)
    print('\t\t]', file=g)
    print('\t},', file=g)
print("]", file=g)
