from unittest import TestCase
from unittest.mock import Mock

from quid.match.Match import Match
from quid.match.MatchSpan import MatchSpan

from proquo.core.ProQuo import ProQuo
import numpy as np


class ProQuoTestCase(TestCase):

    def test_one_word_exact_match(self):

        source_text = 'This is some long quote. This is some text for testing'
        target_text = 'This is some long quote (S.1). And this is a "text" with a quote (S.1)'

        source_span = MatchSpan(0, 23, '')
        target_span = MatchSpan(0, 23, '')

        quid_matches = [Match(source_span, target_span)]

        ref_vectorizer_mock = Mock()
        ref_model_mock = Mock()

        rel_vectorizer_mock = Mock()
        rel_model_mock = Mock()

        ref_probs = np.array([[0.88853]])

        rel_prediction_mock = Mock()
        rel_logits = np.array([[-1.74764, 0.88853]])
        rel_prediction_mock.logits = rel_logits

        ref_vectorizer_mock.vectorize.return_value = None
        rel_vectorizer_mock.vectorize.return_value = None

        ref_model_mock.predict.return_value = ref_probs
        rel_model_mock.predict.return_value = rel_prediction_mock

        proquo = ProQuo(ref_model_mock, ref_vectorizer_mock, rel_model_mock, rel_vectorizer_mock, '"', '"')
        matches = proquo.compare(source_text, target_text, quid_matches)

        self.assertEqual(1, len(matches))
        self.assertEqual(38, matches[0].source_span.start)
        self.assertEqual(42, matches[0].source_span.end)

    def test_one_word_fuzzy_match(self):
        source_text = 'This is some long quote. This is some testword for testing'
        target_text = 'This is some long quote (S.1). And this is a "test-word" with a quote (S.1)'

        source_span = MatchSpan(0, 23, '')
        target_span = MatchSpan(0, 23, '')

        quid_matches = [Match(source_span, target_span)]

        ref_vectorizer_mock = Mock()
        ref_model_mock = Mock()

        rel_vectorizer_mock = Mock()
        rel_model_mock = Mock()

        ref_probs = np.array([[0.88853]])

        rel_prediction_mock = Mock()
        rel_logits = np.array([[-1.74764, 0.88853]])
        rel_prediction_mock.logits = rel_logits

        ref_vectorizer_mock.vectorize.return_value = None
        rel_vectorizer_mock.vectorize.return_value = None

        ref_model_mock.predict.return_value = ref_probs
        rel_model_mock.predict.return_value = rel_prediction_mock

        proquo = ProQuo(ref_model_mock, ref_vectorizer_mock, rel_model_mock, rel_vectorizer_mock, '"', '"')
        matches = proquo.compare(source_text, target_text, quid_matches)

        self.assertEqual(1, len(matches))
        self.assertEqual(38, matches[0].source_span.start)
        self.assertEqual(46, matches[0].source_span.end)

    def test_one_word_multiple_fuzzy_matches(self):
        source_text = 'This is some long quote. This testword is some testword for testing'
        target_text = 'This is some long quote (S.1). And this is a "test-word" with a quote (S.1)'

        source_span = MatchSpan(0, 23, '')
        target_span = MatchSpan(0, 23, '')

        quid_matches = [Match(source_span, target_span)]

        ref_vectorizer_mock = Mock()
        ref_model_mock = Mock()

        rel_vectorizer_mock = Mock()
        rel_model_mock = Mock()

        ref_probs = np.array([[0.88853]])

        rel_prediction_mock = Mock()
        rel_logits = np.array([[-1.74764, 0.88853]])
        rel_prediction_mock.logits = rel_logits

        ref_vectorizer_mock.vectorize.return_value = None
        rel_vectorizer_mock.vectorize.return_value = None

        ref_model_mock.predict.return_value = ref_probs
        rel_model_mock.predict.return_value = rel_prediction_mock

        proquo = ProQuo(ref_model_mock, ref_vectorizer_mock, rel_model_mock, rel_vectorizer_mock, '"', '"')
        matches = proquo.compare(source_text, target_text, quid_matches)

        self.assertEqual(1, len(matches))
        self.assertEqual(30, matches[0].source_span.start)
        self.assertEqual(38, matches[0].source_span.end)

    def test_multi_word_exact_match(self):
        source_text = 'This is some long quote. This is some double word for testing'
        target_text = 'This is some long quote (S.1). And this is a "double word" with a quote (S.1)'

        source_span_1 = MatchSpan(0, 23, '')
        target_span_1 = MatchSpan(0, 23, '')

        source_span_2 = MatchSpan(38, 49, '')
        target_span_2 = MatchSpan(46, 57, '')

        quid_matches = [Match(source_span_1, target_span_1), Match(source_span_2, target_span_2)]

        ref_vectorizer_mock = Mock()
        ref_model_mock = Mock()

        rel_vectorizer_mock = Mock()
        rel_model_mock = Mock()

        ref_probs = np.array([[0.88853]])

        rel_prediction_mock = Mock()
        rel_logits = np.array([[-1.74764, 0.88853]])
        rel_prediction_mock.logits = rel_logits

        ref_vectorizer_mock.vectorize.return_value = None
        rel_vectorizer_mock.vectorize.return_value = None

        ref_model_mock.predict.return_value = ref_probs
        rel_model_mock.predict.return_value = rel_prediction_mock

        proquo = ProQuo(ref_model_mock, ref_vectorizer_mock, rel_model_mock, rel_vectorizer_mock, '"', '"')
        matches = proquo.compare(source_text, target_text, quid_matches)

        self.assertEqual(1, len(matches))
        self.assertEqual(38, matches[0].source_span.start)
        self.assertEqual(49, matches[0].source_span.end)

    def test_multi_word_fuzzy_match(self):
        source_text = 'This is some long quote. This is some double word for testing'
        target_text = 'This is some long quote (S.1). And this is a "double words" with a quote (S.1)'

        source_span_1 = MatchSpan(0, 23, '')
        target_span_1 = MatchSpan(0, 23, '')

        source_span_2 = MatchSpan(38, 49, '')
        target_span_2 = MatchSpan(46, 58, '')

        quid_matches = [Match(source_span_1, target_span_1), Match(source_span_2, target_span_2)]

        ref_vectorizer_mock = Mock()
        ref_model_mock = Mock()

        rel_vectorizer_mock = Mock()
        rel_model_mock = Mock()

        ref_probs = np.array([[0.88853]])

        rel_prediction_mock = Mock()
        rel_logits = np.array([[-1.74764, 0.88853]])
        rel_prediction_mock.logits = rel_logits

        ref_vectorizer_mock.vectorize.return_value = None
        rel_vectorizer_mock.vectorize.return_value = None

        ref_model_mock.predict.return_value = ref_probs
        rel_model_mock.predict.return_value = rel_prediction_mock

        proquo = ProQuo(ref_model_mock, ref_vectorizer_mock, rel_model_mock, rel_vectorizer_mock, '"', '"')
        matches = proquo.compare(source_text, target_text, quid_matches)

        self.assertEqual(1, len(matches))
        self.assertEqual(38, matches[0].source_span.start)
        self.assertEqual(49, matches[0].source_span.end)

    def test_multi_word_multiple_fuzzy_matches(self):
        source_text = 'This is some long quote. This is some double word for testing. There was a dog in the house.' \
                      ' And later we have more double word.'
        target_text = 'This is some long quote (S.1). And this is a "double words" with a quote (S.1).' \
                      ' There was a dog in the house (S.2).'

        source_span_1 = MatchSpan(0, 23, '')
        target_span_1 = MatchSpan(0, 23, '')

        source_span_2 = MatchSpan(63, 91, '')
        target_span_2 = MatchSpan(80, 108, '')

        source_span_3 = MatchSpan(38, 49, '')
        target_span_3 = MatchSpan(46, 58, '')

        source_span_4 = MatchSpan(116, 127, '')
        target_span_4 = MatchSpan(46, 58, '')

        quid_matches = [Match(source_span_1, target_span_1), Match(source_span_2, target_span_2),
                        Match(source_span_3, target_span_3), Match(source_span_4, target_span_4)]

        ref_vectorizer_mock = Mock()
        ref_model_mock = Mock()

        rel_vectorizer_mock = Mock()
        rel_model_mock = Mock()

        ref_probs = np.array([[0.88853]])

        rel_prediction_mock = Mock()
        rel_logits = np.array([[-1.74764, 0.88853]])
        rel_prediction_mock.logits = rel_logits

        ref_vectorizer_mock.vectorize.return_value = None
        rel_vectorizer_mock.vectorize.return_value = None

        ref_model_mock.predict.return_value = ref_probs
        rel_model_mock.predict.return_value = rel_prediction_mock

        proquo = ProQuo(ref_model_mock, ref_vectorizer_mock, rel_model_mock, rel_vectorizer_mock, '"', '"')
        matches = proquo.compare(source_text, target_text, quid_matches)

        self.assertEqual(1, len(matches))
        self.assertEqual(38, matches[0].source_span.start)
        self.assertEqual(49, matches[0].source_span.end)

    def test_multi_word_multiple_fuzzy_matches_too_close(self):
        source_text = 'This is some long quote. This double word is some double word for testing'
        target_text = 'This is some long quote (S.1). And this is a "double words" with a quote (S.1).'

        source_span_1 = MatchSpan(0, 23, '')
        target_span_1 = MatchSpan(0, 23, '')

        source_span_2 = MatchSpan(30, 41, '')
        target_span_2 = MatchSpan(46, 58, '')

        source_span_3 = MatchSpan(50, 61, '')
        target_span_3 = MatchSpan(46, 58, '')

        quid_matches = [Match(source_span_1, target_span_1), Match(source_span_2, target_span_2),
                        Match(source_span_3, target_span_3)]

        ref_vectorizer_mock = Mock()
        ref_model_mock = Mock()

        rel_vectorizer_mock = Mock()
        rel_model_mock = Mock()

        ref_probs = np.array([[0.88853]])

        rel_prediction_mock = Mock()
        rel_logits = np.array([[-1.74764, 0.88853]])
        rel_prediction_mock.logits = rel_logits

        ref_vectorizer_mock.vectorize.return_value = None
        rel_vectorizer_mock.vectorize.return_value = None

        ref_model_mock.predict.return_value = ref_probs
        rel_model_mock.predict.return_value = rel_prediction_mock

        proquo = ProQuo(ref_model_mock, ref_vectorizer_mock, rel_model_mock, rel_vectorizer_mock, '"', '"')
        matches = proquo.compare(source_text, target_text, quid_matches)

        self.assertEqual(0, len(matches))

    def test_parallel_print_basic_success(self):
        source_text = 'This is some long quote. This is some text for testing. And here we have another long quote'
        target_text = 'This is some long quote (S.1). And this is a "text" with a quote (S.3).' \
                      ' And here we have another long quote (S.5).'

        source_span_1 = MatchSpan(0, 23, '')
        target_span_1 = MatchSpan(0, 23, '')

        source_span_2 = MatchSpan(56, 91, '')
        target_span_2 = MatchSpan(72, 107, '')

        quid_matches = [Match(source_span_1, target_span_1), Match(source_span_2, target_span_2)]

        ref_vectorizer_mock = Mock()
        ref_model_mock = Mock()

        rel_vectorizer_mock = Mock()
        rel_model_mock = Mock()

        ref_probs = np.array([[0.88853]])

        rel_prediction_mock = Mock()
        rel_logits = np.array([[-1.74764, 0.88853]])
        rel_prediction_mock.logits = rel_logits

        ref_vectorizer_mock.vectorize.return_value = None
        rel_vectorizer_mock.vectorize.return_value = None

        ref_model_mock.predict.return_value = ref_probs
        rel_model_mock.predict.return_value = rel_prediction_mock

        proquo = ProQuo(ref_model_mock, ref_vectorizer_mock, rel_model_mock, rel_vectorizer_mock, '"', '"')
        matches = proquo.compare(source_text, target_text, quid_matches, parallel_print=True,
                                 parallel_print_first_page=1, parallel_print_last_page=6)

        self.assertEqual(1, len(matches))
        self.assertEqual(38, matches[0].source_span.start)
        self.assertEqual(42, matches[0].source_span.end)

    def test_parallel_false_no_match(self):
        source_text = 'This is some long quote. This is some text for testing. And here we have another long quote'
        target_text = 'This is some long quote (S.1). And this is a "text" with a quote (S.3).' \
                      ' And here we have another long quote (S.5).'

        source_span_1 = MatchSpan(0, 23, '')
        target_span_1 = MatchSpan(0, 23, '')

        source_span_2 = MatchSpan(56, 91, '')
        target_span_2 = MatchSpan(72, 107, '')

        quid_matches = [Match(source_span_1, target_span_1), Match(source_span_2, target_span_2)]

        ref_vectorizer_mock = Mock()
        ref_model_mock = Mock()

        rel_vectorizer_mock = Mock()
        rel_model_mock = Mock()

        ref_probs = np.array([[0.88853]])

        rel_prediction_mock = Mock()
        rel_logits = np.array([[-1.74764, 0.88853]])
        rel_prediction_mock.logits = rel_logits

        ref_vectorizer_mock.vectorize.return_value = None
        rel_vectorizer_mock.vectorize.return_value = None

        ref_model_mock.predict.return_value = ref_probs
        rel_model_mock.predict.return_value = rel_prediction_mock

        proquo = ProQuo(ref_model_mock, ref_vectorizer_mock, rel_model_mock, rel_vectorizer_mock, '"', '"')
        matches = proquo.compare(source_text, target_text, quid_matches, parallel_print=False)

        self.assertEqual(0, len(matches))

    def test_example_clustering(self):
        source_text = 'This is some long quote. This is some text for testing. And here we have another long quote.' \
                      ' We need some text in between. There was a dog in the house.'
        target_text = 'This is some long quote (S.1). And this is a "text" with a quote (S.2).' \
                      ' And here we have another long quote (S.3). There was a dog in the house (S.4).' \
                      ' Plus some text with (1897).'

        source_span_1 = MatchSpan(0, 23, '')
        target_span_1 = MatchSpan(0, 23, '')

        source_span_2 = MatchSpan(56, 91, '')
        target_span_2 = MatchSpan(72, 107, '')

        source_span_3 = MatchSpan(123, 151, '')
        target_span_3 = MatchSpan(116, 144, '')

        quid_matches = [Match(source_span_1, target_span_1), Match(source_span_2, target_span_2),
                        Match(source_span_3, target_span_3)]

        ref_vectorizer_mock = Mock()
        ref_model_mock = Mock()

        rel_vectorizer_mock = Mock()
        rel_model_mock = Mock()

        ref_probs = [np.array([[0.9], [0.9], [0.9]]), np.array([[0.9], [0.9], [0.9], [0.9], [0.1]])]

        rel_prediction_mock = Mock()
        rel_logits = np.array([[0.88853, -1.74764], [-1.74764, 0.88853], [0.88853, -1.74764], [0.88853, -1.74764]])
        rel_prediction_mock.logits = rel_logits

        ref_vectorizer_mock.vectorize.return_value = None
        rel_vectorizer_mock.vectorize.return_value = None

        ref_model_mock.predict.side_effect = ref_probs
        ref_model_mock.predict.return_value = ref_probs
        rel_model_mock.predict.return_value = rel_prediction_mock

        proquo = ProQuo(ref_model_mock, ref_vectorizer_mock, rel_model_mock, rel_vectorizer_mock, '"', '"')
        matches = proquo.compare(source_text, target_text, quid_matches)

        self.assertEqual(1, len(matches))
        self.assertEqual(38, matches[0].source_span.start)
        self.assertEqual(42, matches[0].source_span.end)

    def test_quote_without_ref(self):
        source_text = 'This is some long quote. This is some text for testing. And here we have another long quote.' \
                      ' We need some more more more more text in between. There was a dog in the house.'
        target_text = 'This is some long quote (S.1). And this is a "text" with a quote.' \
                      ' And here we have another long quote (S.3). There was a dog in the house (S.4).' \
                      ' Plus some text with (1897).'

        source_span_1 = MatchSpan(0, 23, '')
        target_span_1 = MatchSpan(0, 23, '')

        source_span_2 = MatchSpan(56, 91, '')
        target_span_2 = MatchSpan(66, 101, '')

        source_span_3 = MatchSpan(143, 171, '')
        target_span_3 = MatchSpan(103, 131, '')

        quid_matches = [Match(source_span_1, target_span_1), Match(source_span_2, target_span_2),
                        Match(source_span_3, target_span_3)]

        ref_vectorizer_mock = Mock()
        ref_model_mock = Mock()

        rel_vectorizer_mock = Mock()
        rel_model_mock = Mock()

        ref_probs = [np.array([[0.9], [0.9], [0.9]]), np.array([[0.9], [0.9], [0.9], [0.9], [0.1]])]

        rel_prediction_mock = Mock()
        rel_logits = np.array([[0.88853, -1.74764], [0.88853, -1.74764], [0.88853, -1.74764], [0.88853, -1.74764]])
        rel_prediction_mock.logits = rel_logits

        ref_vectorizer_mock.vectorize.return_value = None
        rel_vectorizer_mock.vectorize.return_value = None

        ref_model_mock.predict.side_effect = ref_probs
        ref_model_mock.predict.return_value = ref_probs
        rel_model_mock.predict.return_value = rel_prediction_mock

        proquo = ProQuo(ref_model_mock, ref_vectorizer_mock, rel_model_mock, rel_vectorizer_mock, '"', '"')
        matches = proquo.compare(source_text, target_text, quid_matches)

        self.assertEqual(1, len(matches))
        self.assertEqual(38, matches[0].source_span.start)
        self.assertEqual(42, matches[0].source_span.end)
