import os
import re
from subprocess import Popen, PIPE

import constants
from utils import file_utils, word_utils


def get_diff_from_raw_diff(diff_str, tokenize=False, skip_lines=2):
    diff_str = re.sub('@@.+?@@', '@@\n', diff_str)

    diff_lines = diff_str.split('\n')

    # to trigger code block in else
    # diff_lines.append('$')

    diff_dicts = list()
    cur_pos = None
    cur_patch = list()
    diff_lines.append('@@')
    for idx, diff_line in enumerate(diff_lines):
        diff_line = re.sub(r'\s+', ' ', diff_line)

        if idx < skip_lines or len(diff_line) <= 0 or re.match(r'^\s+$', diff_line):
            continue

        if tokenize:
            # 分词
            words = word_utils.word_tokenizer(diff_line)
            diff_line = ' '.join(words)

        if diff_line[0] == '-':
            diff_line = re.sub(r'^-', constants.SPECIAL_WORD_DELETE + ' ', diff_line)
            cur_patch.append(diff_line)
        elif diff_line[0] == '+':
            diff_line = re.sub(r'^\+', constants.SPECIAL_WORD_ADD + ' ', diff_line)
            cur_patch.append(diff_line)
        elif diff_line[0] == '@':
            if cur_pos is not None:
                diff_dicts.append({
                    'pos': cur_pos,
                    'patch': ' '.join(cur_patch)
                })
                cur_patch = list()
            cur_pos = diff_line
        else:
            diff_line = '%s %s' % (constants.SPECIAL_WORD_SAME, diff_line)
            cur_patch.append(diff_line)

    return diff_dicts


def diff(old_str_list, new_str_list):
    """Get difference between two list of string.

    Args:
        old_str_list: list of str.
        new_str_list: list of str.
    Return:
        list of object.
    Example:
        >>> diff(['abc', 'a', 'aa', 'b', 'd'], ['abc', 'b', 'c', 'd'])
        [
            {
                'pos': '2,3d1',
                'patch': '<DELETE> a <br> <DELETE> aa'
            },
            {
                'pos': '5c3,4',
                'patch': '<DELETE> d <br> <ADD> c <br> <ADD> d'
            }
        ]
    """
    old_str = '\n'.join([cur_str for cur_str in old_str_list]) + '\n'
    new_str = '\n'.join([cur_str for cur_str in new_str_list]) + '\n'

    tmp_old_file_path = os.path.join(constants.STATIC_DIR, 'old_file')
    tmp_new_file_path = os.path.join(constants.STATIC_DIR, 'new_file')

    file_utils.write_string_to_abs_file(tmp_old_file_path, old_str + '\n')
    file_utils.write_string_to_abs_file(tmp_new_file_path, new_str + '\n')
    command_str = 'diff %s %s -u' % (tmp_old_file_path, tmp_new_file_path)
    diff_bytes, _ = Popen(['bash', '-c', command_str], stdin=PIPE, stdout=PIPE, stderr=PIPE).communicate()
    os.remove(tmp_old_file_path)
    os.remove(tmp_new_file_path)

    diff_str = diff_bytes.decode()
    return get_diff_from_raw_diff(diff_str)


if __name__ == '__main__':
    print('diff as follows')
    print(get_diff_from_raw_diff("""@@ -136,11 +136,9 @@ A heap can be classified further as either a "max heap" or a "min heap". In a ma
 than or equal to those of the children and the highest key is in the root node. In a min heap, the keys of parent nodes are less than
 or equal to those of the children and the lowest key is in the root node
 * Time Complexity:
- * Access: `O(log(n))`
- * Search: `O(log(n))`
+ * Access Max / Min: `O(1)`
  * Insert: `O(log(n))`
@@-ss@@
- * Remove: `O(log(n))`
- * Remove Max / Min: `O(1)`
+ * Remove Max / Min: `O(log(n))`
 
 <img src="/Images/heap.png?raw=true" alt="Max Heap" width="400" height="500">
""", skip_lines=0))
