1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env ruby

require 'pp'
require 'inline'

class Scan
  def initialize(seq)
    @seq = seq
  end

  inline do |builder|
    builder.prefix %{
      #define MATCH(A,B) ((equal[A] & equal[B]) != 0)
    }

    builder.prefix %{
      int equal[256] = {
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 1,14, 4,11, 0, 0, 8, 7, 0, 0,10, 0, 5,15, 0,
          0, 0, 9,12, 2, 2,13, 3, 0, 6, 0, 0, 0, 0, 0, 0,
          0, 1,14, 4,11, 0, 0, 8, 7, 0, 0,10, 0, 5,15, 0,
          0, 0, 9,12, 2, 2,13, 3, 0, 6, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
      };
    }

    # ss is the start of the string, used only for reporting the match endpoints.
    builder.c %{
      int backtrack(char* ss, char* s, char* p, int mm, int ins, int del)
      {
          int r = 0;

          while (*s && MATCH(*s, *p)) ++s, ++p;    // OK to always match longest segment

          if (!*p)
              return (s - ss) - 1;
          else
          {
              if (mm && *s && *p && (r = backtrack(ss, s + 1, p + 1, mm - 1, ins, del))) return r;
              if (ins && *s &&      (r = backtrack(ss, s + 1, p, mm, ins - 1, del)))     return r;
              if (del && *p &&      (r = backtrack(ss, s, p + 1, mm, ins, del - 1)))     return r;
          }

          return 0;
      }
    }
 
    # Find all occurrences of p starting at any position in s, with at most
    # mm mismatches, ins insertions and del deletions.
    builder.c %{
      int patscan(char* p, int mm, int ins, int del)
      {
          char* s = StringValuePtr(rb_iv_get(self, "@seq"));
          char* ss;
          int   end;

          for (ss = s; *s; ++s)
          {
              end = backtrack(ss, s, p, mm, ins, del);

              if (end)
                 return end;
          }
       }
    }
  end
end


seq = "tcatcgagtcatcgatcgatcgatcgatcga"
pat = "gtcatcga"

scanner = Scan.new(seq)

puts scanner.patscan(pat, 0, 0, 0)


__END__