Spaces:
Runtime error
Runtime error
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import unittest | |
import mock | |
import os,sys,inspect | |
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) | |
parentdir = os.path.dirname(currentdir) | |
sys.path.insert(0,parentdir) | |
from apply_bpe import isolate_glossary, BPE | |
class TestIsolateGlossaryFunction(unittest.TestCase): | |
def setUp(self): | |
self.glossary = 'like' | |
def _run_test_case(self, test_case): | |
orig, expected = test_case | |
out = isolate_glossary(orig, self.glossary) | |
self.assertEqual(out, expected) | |
def test_empty_string(self): | |
orig = '' | |
exp = [''] | |
test_case = (orig, exp) | |
self._run_test_case(test_case) | |
def test_no_glossary(self): | |
orig = 'word' | |
exp = ['word'] | |
test_case = (orig, exp) | |
self._run_test_case(test_case) | |
def test_isolated_glossary(self): | |
orig = 'like' | |
exp = ['like'] | |
test_case = (orig, exp) | |
self._run_test_case(test_case) | |
def test_word_one_side(self): | |
orig = 'likeword' | |
exp = ['like', 'word'] | |
test_case = (orig, exp) | |
self._run_test_case(test_case) | |
def test_words_both_sides(self): | |
orig = 'wordlikeword' | |
exp = ['word', 'like', 'word'] | |
test_case = (orig, exp) | |
self._run_test_case(test_case) | |
def test_back_to_back_glossary(self): | |
orig = 'likelike' | |
exp = ['like', 'like'] | |
test_case = (orig, exp) | |
self._run_test_case(test_case) | |
def test_multiple_glossaries(self): | |
orig = 'wordlikewordlike' | |
exp = ['word', 'like', 'word', 'like'] | |
test_case = (orig, exp) | |
self._run_test_case(test_case) | |
class TestBPEIsolateGlossariesMethod(unittest.TestCase): | |
def setUp(self): | |
amock = mock.MagicMock() | |
amock.readline.return_value = 'something' | |
glossaries = ['like', 'Manuel', 'USA'] | |
self.bpe = BPE(amock, glossaries=glossaries) | |
def _run_test_case(self, test_case): | |
orig, expected = test_case | |
out = self.bpe._isolate_glossaries(orig) | |
self.assertEqual(out, expected) | |
def test_multiple_glossaries(self): | |
orig = 'wordlikeUSAwordManuelManuelwordUSA' | |
exp = ['word', 'like', 'USA', 'word', 'Manuel', 'Manuel', 'word', 'USA'] | |
test_case = (orig, exp) | |
self._run_test_case(test_case) | |
class TestRegexIsolateGlossaries(unittest.TestCase): | |
def setUp(self): | |
amock = mock.MagicMock() | |
amock.readline.return_value = 'something' | |
glossaries = ["<country>\w*</country>", "<name>\w*</name>", "\d+"] | |
self.bpe = BPE(amock, glossaries=glossaries) | |
def _run_test_case(self, test_case): | |
orig, expected = test_case | |
out = self.bpe._isolate_glossaries(orig) | |
self.assertEqual(out, expected) | |
def test_regex_glossaries(self): | |
orig = 'wordlike<country>USA</country>word10001word<name>Manuel</name>word<country>USA</country>' | |
exp = ['wordlike', '<country>USA</country>', 'word', '10001', 'word', '<name>Manuel</name>', 'word', '<country>USA</country>'] | |
test_case = (orig, exp) | |
self._run_test_case(test_case) | |
def encode_mock(segment, x2, x3, x4, x5, x6, x7, glosses, dropout): | |
if glosses.match(segment): | |
return (segment,) | |
else: | |
l = len(segment) | |
return (segment[:l//2], segment[l//2:]) | |
class TestBPESegmentMethod(unittest.TestCase): | |
def setUp(self): | |
amock = mock.MagicMock() | |
amock.readline.return_value = 'something' | |
glossaries = ['like', 'Manuel', 'USA'] | |
self.bpe = BPE(amock, glossaries=glossaries) | |
def _run_test_case(self, test_case, encode_function): | |
orig, expected = test_case | |
out = self.bpe.segment(orig) | |
self.assertEqual(out, expected) | |
def test_multiple_glossaries(self): | |
orig = 'wordlikeword likeManuelword' | |
exp = 'wo@@ rd@@ like@@ wo@@ rd like@@ Manuel@@ wo@@ rd' | |
test_case = (orig, exp) | |
self._run_test_case(test_case) | |
if __name__ == '__main__': | |
unittest.main() | |