"""Test gen_cmat. king In [21]: len(de) Out[21]: 51 In [22]: len(en) Out[22]: 51 In [23]: len(" ".join(en)) Out[23]: 11208 In [24]: len(" ".join(de)) Out[24]: 13532 In [25]: %time en_vec = model_s.encode(en) CPU times: user 22 s, sys: 436 ms, total: 22.4 s Wall time: 22.4 s In [26]: %time de_vec = model_s.encode(de) CPU times: user 22.8 s, sys: 311 ms, total: 23.1 s Wall time: 23.1 s en1 = loadparas("data/sternstunden04-en.txt") en2 = loadparas("data/sternstunden04-de.txt") len(en1) # 30 len(" ".join(en1)) # 29718 len(" ".join(en2)) # 31478 """ from cmat2aset310 import cmat2aset from aset2pairs import aset2pairs from st_mlbee.gen_cmat import gen_cmat from st_mlbee.loadtext import loadparas paras1 = loadparas("data/sternstunden04-en.txt") paras2 = loadparas("data/sternstunden04-de.txt") cmat = gen_cmat(paras1, paras2) def test_gen_cmat_sternstunden04(): """Test gen_cmat sternstunden04.""" len1, len2 = len(paras1), len(paras2) # note the order assert cmat.shape == (len2, len1) def test_aset2pairs(): """Test aset2pairs.""" aset = cmat2aset(cmat) pairs = aset2pairs(paras1, paras2, aset) assert "Marseillaise" in pairs[2][0] assert "Marseillaise" in pairs[2][1] assert pairs[2][2] > 0.95