import pytest
from .. import encodings
from itertools import combinations
[docs]@pytest.fixture(scope="module")
def sequence():
return b"ACGTTTGAGATGAGATATAGANNNN"
[docs]@pytest.fixture(scope="module")
def encoder_2bit(sequence):
length = len(sequence)
return encodings.TwoBit(length)
[docs]@pytest.fixture(scope="module")
def encoder_3bit():
return encodings.ThreeBit()
[docs]@pytest.fixture(scope="module", params=[encodings.TwoBit, encodings.ThreeBit])
def encoder(request):
return request.param
[docs]def test_two_bit_encode_decode_produces_same_string_except_for_N(
sequence, encoder_2bit
):
encoded = encoder_2bit.encode(sequence)
decoded = encoder_2bit.decode(encoded)
assert sequence[:4] == decoded[:4] # last 4 are N, which get randomized
[docs]def test_three_bit_encode_decode_produces_same_string(sequence, encoder_3bit):
encoded = encoder_3bit.encode(sequence)
decoded = encoder_3bit.decode(encoded)
assert sequence == decoded
[docs]def test_two_bit_encoder_gets_correct_gc_content(encoder_2bit):
sequence_no_n = b"AGCGCGAT"
gc_content = sequence_no_n.count(b"C") + sequence_no_n.count(b"G")
encoded = encoder_2bit.encode(sequence_no_n)
assert encoder_2bit.gc_content(encoded) == gc_content
[docs]def test_three_bit_encoder_gets_correct_gc_content(sequence, encoder_3bit):
encoded = encoder_3bit.encode(sequence)
assert encoder_3bit.gc_content(encoded) == sequence.count(b"C") + sequence.count(
b"G"
)
[docs]def test_two_bit_throws_errors_when_asked_to_encode_unknown_nucleotide(encoder_2bit):
with pytest.raises(KeyError):
encoder_2bit.encode(b"ACGTP") # P is not a valid code
[docs]def test_three_bit_encodes_unknown_nucleotides_as_N(encoder_3bit):
encoded = encoder_3bit.encode(b"ACGTP") # P is not a valid code
decoded = encoder_3bit.decode(encoded)
assert decoded == b"ACGTN"
[docs]@pytest.fixture
def simple_barcodes():
"""simple barcode set with min_hamming = 1, max_hamming = 2"""
return [b"ACGT", b"ACGG", b"ACGA", b"ACGC", b"TCGT", b"CCGT", b"GCGT"]
[docs]@pytest.fixture
def simple_hamming_distances(simple_barcodes):
simple_hamming_distances = []
for a, b in combinations(simple_barcodes, 2):
d_hamming = 0
for i, j in zip(a, b):
if i != j:
d_hamming += 1
simple_hamming_distances.append(d_hamming)
return simple_hamming_distances
[docs]def test_encoded_hamming_distance_is_accurate(
simple_hamming_distances, simple_barcodes, encoder
):
# encode simple barcodes
tbe = encoder(4)
encoded = [tbe.encode(b) for b in simple_barcodes]
encoded_hamming_distances = []
# use hamming distance function
for a, b in combinations(encoded, 2):
encoded_hamming_distances.append(tbe.hamming_distance(a, b))
# verify they are the same as the simple function used in this file
assert simple_hamming_distances == encoded_hamming_distances