scire
Sadh's C++ Impromptu Routines Ensemble
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
knuth_morris_pratt.hpp
Go to the documentation of this file.
1 // scire/string/knuth_morris_pratt
2 
3 // Copyright (c) 2014, Khan 'Sadh' Mostafa (http://nafSadh.com/Khan)
4 // Distributed under the Boost Software License, Version 1.0.
5 // (See accompanying text at http://www.boost.org/LICENSE_1_0.txt)
6 
7 /**
8  scire/string/knuth_morris_pratt.hpp
9 
10  scire <thing> implementation:
11  - StringMatchKMP : Knuth–Morris–Pratt (KMP) algorithm for string-matching
12 
13  other required scire files:
14  none
15 
16  author:
17  ~nafSadh
18  */
19 #ifndef SCIRE_string_knuth_morris_pratt_HPP
20 #define SCIRE_string_knuth_morris_pratt_HPP
21 
22 #include <algorithm> // std::copy
23 
24 namespace scire
25 {
26 
28 #define SCIRE_StringMatchKMP_CLASS
29 
30  /**
31  * The Knuth-Morris-Pratt (KMP) algorithm for string-matching problem.
32  *
33  * A KMP object is initialized with a pattern to find. Each character of text
34  * is then fed to the object with ReadNext() for matching.
35  *
36  * ref:
37  * -# "<a href='http://community.topcoder.com/tc?module=Static&d1=tutorials&d2=stringSearching'>
38  Introduction to String Searching Algorithms - Rabin-Karp and
39  Knuth-Morris-Pratt Algorithms</a>". TheLlama (TopCoder Member)
40 
41  -# "<a href='http://www.ics.uci.edu/~eppstein/161/kmp/'>Example code for
42  Knuth-Morris-Pratt algorithm</a>". Prof. David Eppstein, UC Irvine
43 
44  -# Knuth, Donald; Morris, James H., jr; Pratt, Vaughan (1977). "Fast
45  pattern matching in strings". SIAM Journal on Computing 6 (2): 323-350.
46  doi:10.1137/0206024. Zbl 0372.68005.
47  */
48  template<typename CharType = char, typename SzType = size_t>
50  {
51  private:
52  const CharType* pattern;/**< pattern to search for */
53  SzType lenght;/**< length of pattern */
54  SzType* F;/**< failuire functions */
55  SzType index;/**< current index of pattern matched */
56 
57  private:
58  /**
59  * return a safe copy of the passed string
60  */
61  static const CharType*
62  CopyStoreStr(
63  const CharType* str,/**< string to store a copy of */
64  SzType slen = 0/**< length of the string */
65  )
66  {
67  if (str == 0) return 0;
68  //if (slen < 0) slen = strlen(str);
69  CharType* copyStr = new CharType[slen + 1];
70  if (copyStr == 0) return 0;
72  //std::copy(str, str + slen, copyStr);
73  copyStr[slen] = '\0';
74 
75  return copyStr;
76  }
77 
78  /**
79  * compute failure functions.
80  *
81  * Make jump table for mismatches (the usual finite state automaton).
82  *
83  * The inner loop works by checking the prefixes the next character of
84  * the target can continue, longest first, chaining back by way of the
85  * previously filled retarget entries, until it either finds one that
86  * matches (the != clause) or runs out of prefixes (the > 0 clause).
87  */
88  static SzType*
89  BuildFailureFunctions(
90  const CharType* pattern,/**< string to build failure functions from */
91  SzType len/**< lenght of the string*/
92  )
93  {
94  if (pattern == nullptr) return nullptr;
95  if (len == 0) return nullptr;
96  SzType* F = new SzType[len+1];
97  if (F == nullptr) return nullptr;
98 
99 
100  F[0] = F[1] = 0; // always true
101 
102  for (SzType i = 2; i <= len; i++) {
103  // j is the index of the largest next partial match
104  // (the largest suffix/prefix) of the string under
105  // index i - 1
106  SzType j = F[i - 1];
107  for (;;) {
108  // check to see if the last character of string i -
109  // - pattern[i - 1] "expands" the current "candidate"
110  // best partial match - the prefix under index j
111  if (pattern[j] == pattern[i - 1]) {
112  F[i] = j + 1;
113  break;
114  }
115  // if we cannot "expand" even the empty string
116  if (j == 0) {
117  F[i] = 0;
118  break;
119  }
120  // else go to the next best "candidate" partial match
121  j = F[j];
122  }
123  }
124  return F;
125  }
126 
127  public:
128  /**
129  * Construct KMP pattern matching object with pattern to find
130  */
132  const CharType* pattern,/**< patther to match */
133  SzType length/**< lenght of pattern */
134  )
135  {
137  this->lenght = length;
139  this->index;
140  }
141 
142  /**
143  * read next character in text stream
144  * @return true if pattern matched
145  */
146  bool
148  CharType c /**< next char in text stream */
149  )
150  {
151  if (F == 0) return false;
152 
153  while (c != pattern[index]) {
154  if (index == 0) return false; // fell all the way back, have to give up
155  index = F[index]; // more positions to fall back to, keep trying
156  }
157  if (pattern[++index] != '\0') return false; // partial match
158  else {
159  index = F[index]; // full match, but keep going
160  return true;
161  }
162  }
163 
164  /**
165  * Find the position of the first occurrence of a substring in a text
166  *
167  * @return index (position) of text where substring (pattern) matched. if not
168  * found then the last index of text (text len)
169  */
170  static SzType Position(
171  const CharType* text,/**< text to search in (haystack) */
172  SzType textlen, /**< lenth of text */
173  const CharType* pattern,/**< pattern to find (substring, needle) */
174  SzType patternlen /**< length of pattern */
175  )
176  {
178  SzType i = 0;
179  while (i<textlen) {
180  if (kmp.ReadNext(text[i]))
181  return i+1-patternlen;
182  i++;
183  }
184  return i;
185  }
186 
187  };
188 
189 #endif//SCIRE_StringMatchKMP_CLASS
190 }
191 #endif//SCIRE_string_knuth_morris_pratt_HPP
The Knuth-Morris-Pratt (KMP) algorithm for string-matching problem.
scire/graph/gale_shapley.hpp
bool ReadNext(CharType c)
read next character in text stream
StringMatchKMP(const CharType *pattern, SzType length)
Construct KMP pattern matching object with pattern to find.
static SzType Position(const CharType *text, SzType textlen, const CharType *pattern, SzType patternlen)
Find the position of the first occurrence of a substring in a text.