class Pattern:
"""Pattern is a sequence of values that define which indices
must have the same values. For example, "AAB" means positions
0 and 1 must be the same because both are "A"s."""
def __init__(self, pattern):
"""Initialize a Pattern instance.
'watermark' - value representing a watermark,
typically a string."""
raise NotImplementedError("Pattern.__init__")
def length(self):
"""Return length of pattern."""
raise NotImplementedError("Pattern.length")
def value(self, i):
"""Return pattern value at index 'i'.
'i' - value index, integer."""
raise NotImplementedError("Pattern.value")
def find_all_mappings(self, watermark):
"""Return a list of mapping.Mapping instances that
represent all possible mappings from watermark values
to pattern values. Watermarks must be longer than
patterns and there may possibly be multiple mappings
in different parts of the watermark for the same
pattern.
'watermark' - watermark to search against, an instance
supporting watermark.Watermark API."""
raise NotImplementedError("Pattern.find_all_mappings")
class StringPattern(Pattern):
def __init__(self, pattern):
self.pattern = pattern # public
v2i = {} # value-to-indices map
for i in range(self.length()):
v = self.value(i)
try:
v2i[v].append(i)
except KeyError:
v2i[v] = [ i ]
# _same_values is a list of 2-tuples where the first
# element is the pattern value and the second is the
# list of indices where the value occurs. We keep
# lists even with only one index because it is also
# used to build the watermark-pattern value mapping.
self._same_values = [ (v, indices)
for v, indices in v2i.items() ]
def length(self):
return len(self.pattern)
def value(self, i):
return self.pattern[i]
def find_all_mappings(self, watermark):
from mapping import MappingSet
mappings = MappingSet()
# Check each possible starting index for mapping
for start in watermark.pattern_start_indices(self.length()):
m = self._find_mapping(watermark, start)
if m:
mappings.add(m)
return mappings
def _find_mapping(self, watermark, start):
# Return a mapping.Mapping instance if watermark
# (starting at 'start') matches pattern, or None
# if it does not match.
from mapping import Mapping
m = Mapping()
w_values = watermark.values(start, self.length())
for pattern_value, indices in self._same_values:
# Add mapping from watermark to pattern value.
watermark_value = w_values[indices[0]]
m[watermark_value] = pattern_value
for i in indices[1:]:
# Watermark values must match at the same
# indices as pattern values. If any does
# not, there cannot be any mapping.
if w_values[i] != watermark_value:
return None
return m
if __name__ == "__main__":
import unittest
class TestPattern(unittest.TestCase):
def setUp(self):
self.p = StringPattern("XXYY")
from watermark import DNAWatermark
self.wm1 = DNAWatermark("TTTATGATGCGGCGGAAA")
self.wm2 = DNAWatermark("ATGATGCGGCGGAAA")
self.wm3 = DNAWatermark("AAAATGATGCGGCGG")
self.wnm = DNAWatermark("TTTATGGTGCGGCGGAAA")
def test_match(self):
from mapping import Mapping, MappingSet
answer = MappingSet([ Mapping({'ATG': 'X',
'CGG': 'Y'}) ])
ms = self.p.find_all_mappings(self.wm1)
self.assertEqual(ms, answer)
ms = self.p.find_all_mappings(self.wm2)
self.assertEqual(ms, answer)
ms = self.p.find_all_mappings(self.wm3)
self.assertEqual(ms, answer)
def test_no_match(self):
ms = self.p.find_all_mappings(self.wnm)
self.assertEqual(len(ms), 0)
unittest.main()