import json
import os
import pdb
import pickle


def add_lang_by_task(target_str, task, sub_task):
    if task == 'bigfixes_task':
        target_str = target_str
    return target_str

def convert_examples_to_features(item):
    example, example_index, tokenizer, args, stage = item

    if args.model_type in ['t5', 'codet5'] and args.add_task_prefix:
        if args.sub_task != 'none':
            source_str = "{} {}: {}".format(args.task, args.sub_task, example.source)
        else:
            source_str = "{}: {}".format(args.task, example.source)
    else:
        source_str = example.source

    retrived_str = example.retrived

    concat_str = f"{source_str} <S2SV_Retrived> {retrived_str}"
    concat_str = concat_str.replace('</s>','<unk>')
    
    concat_ids = tokenizer.encode(concat_str, max_length=args.max_source_length, padding='max_length', truncation=True)

    assert concat_ids.count(tokenizer.eos_token_id) == 1

    if stage == 'test':
        target_ids = []
    else:
        target_str = example.target
        if args.add_lang_ids:
            target_str = add_lang_by_task(example.target, args.task, args.sub_task)
        if args.task in ['defect', 'clone']:
            if target_str == 0:
                target_str = 'false'
            elif target_str == 1:
                target_str = 'true'
            else:
                raise NameError
        target_str = target_str.replace('</s>', '<unk>')
        target_ids = tokenizer.encode(target_str, max_length=args.max_target_length, padding='max_length',
                                      truncation=True)
        assert target_ids.count(tokenizer.eos_token_id) == 1

    return InputFeatures(
        example_index,
        concat_ids,
        target_ids,
        url=example.url
    )

class InputFeatures(object):

    def __init__(self,
                 example_id,
                 source_ids,
                 target_ids,
                 url=None
                 ):
        self.example_id = example_id
        self.source_ids = source_ids
        self.target_ids = target_ids
        self.url = url


class Example(object):

    def __init__(self,
                 idx,
                 source,
                 target,
                 retrived='',
                 url=None,
                 task='',
                 sub_task=''
                 ):
        self.idx = idx
        self.source = source
        self.target = target
        self.retrived=retrived
        self.url = url
        self.task = task
        self.sub_task = sub_task


def read_bigfixes_examples(filename,data_num):
    examples=[]
 
    with open(filename, 'r') as json_file:
        for idx, line in enumerate(json_file):
            data = json.loads(line.strip())
            source = data.get("source", "")
            target = data.get("target", "")
            retrived = data.get("retrived", "")

            examples.append(
                Example(
                    idx=idx,
                    source=source,
                    target=target,
                    retrived=retrived
                )
            )
            if idx == data_num:
                break

    return examples
