diff --git a/Writerside/images_data/17-5-1.png b/Writerside/images_data/17-5-1.png new file mode 100644 index 0000000..132239b Binary files /dev/null and b/Writerside/images_data/17-5-1.png differ diff --git a/Writerside/topics/Data-Structures-and-Algorithms-2.md b/Writerside/topics/Data-Structures-and-Algorithms-2.md index af4b0ac..69a24f8 100644 --- a/Writerside/topics/Data-Structures-and-Algorithms-2.md +++ b/Writerside/topics/Data-Structures-and-Algorithms-2.md @@ -6116,519 +6116,94 @@ sum of edge weights is negative.

negative cycles.

-## 18 Maximum Flow and Minimum Cut + + +

Initialize distTo[s] = 0 and distTo[v] = ∞ for all + other vertices.

+
+ +

Repeat V times, relax each edge.

+
+
-## 19 Substring Search +

Practical Improvement: If +distTo[v] does not change during pass i, no need to +relax any edge pointing from v in pass i+1 => +maintain queue of vertices +whose distTo[] changed.

-### 19.1 Introduction + + + + + + + + + +
AlgorithmRestrictionTypical CaseWorst CaseExtra Space
Topological SortNo Directed CyclesE + VE + VV +

Dijkstra

(Binary Heap)

+
No Negative WeightsE \log V +E \log VV
Bellman-FordNo Negative CyclesEV +EVV

Bellman-Ford

(queue-based)

+
E + VEVV
- + +
  • -

    Goal: Find pattern of length -M in text of length N (typically -N >> M).

    +

    Directed cycles make the problem harder.

  • -

    Applications: Find & replace, -computer forensics, identify patterns indicative of spam, -electronic surveillance, screen scraping, etc.

    +

    Negative weights make the problem harder.

    +
  • +
  • +

    Negative cycles makes the problem intractable.

  • +
    -### 19.2 Brute-Force Substring Search - -* Theoretical challenge: Linear-time guarantee. - (Worst case: \sim MN) -* Practical challenge: Avoid backup in text stream. (Brute-force - algorithm needs backup for every mismatch) - -Java - -```Java -public static int search (String pat, String txt) { - int M = pat.length(); - int N = txt.length(); - int i, j; - for (i = 0; i <= N - M; i++) { - for (j = 0; j < M; j++) { - if (txt.charAt(i + j) != pat.charAt(j)) { - break; - } - } - if (j == M) { - return i; - } - } - return N; -} -``` - -Java (Alternate Implementation) - -```Java -/** - * Same sequence of char compares as previous implementation. - *

    - * {@code i} points to end of sequence of already-matched chars - * in text. - *

    - * {@code j} stores number of already-matchedchars (end of - * sequence in pattern). - */ -public static int search(String pat, String txt) { - int i, M = pat.length(); - int j, N = txt.length(); - for (i = 0, j = 0; i < N && j < M; i++) { - if (txt.charAt(i) == pat.charAt(j)) { - j++; - } - else { - i -= j; - j = 0; - } - } - if (j == M) { - return i - M; - } - else { - return N; - } -} -``` - -C++ - -```C++ -int bruteForceSubstringSearch(const std::string& text, const std::string& pattern) { - int n = text.length(); - int m = pattern.length(); - - for (int i = 0; i <= n - m; i++) { - int j; - for (j = 0; j < m; j++) { - if (text[i + j] != pattern[j]) { - break; - } - } - if (j == m) { - return i; - } - } - return -1; -} -``` - -Python - -```Python -def brute_force_search(main_string, sub_string): - len_main = len(main_string) - len_sub = len(sub_string) - - for i in range(len_main - len_sub + 1): - j = 0 - - while(j < len_sub): - if (main_string[i + j] != sub_string[j]): - break - j += 1 - - if (j == len_sub): - return i - - return -1 -``` - -### 19.3 Knuth-Morris-Pratt - -#### 19.3.1 Proposition - -* KMP substring search accesses no more than M + N - chars to search for a pattern of length M in a text of - length N. - -> Proof: Each pattern char accessed once when constructing DFA; -> each text char accessed once (in the worst case) when simulating -> DFA. -> -{style = "tip"} - -* KMP constructs `dfa[][]` in time and space proportional to RM, - where R is the alphabet size and M is the pattern - length. - -> Improved version of KMP constructs `nfa[]` in time and space -> proportional to M. -> -{style = "tip"} - -#### 19.3.2 DFA - -Deterministic Finite State Automaton (DFA) is an abstract -string-search machine. - -* Finite number of states (including start and halt). -* Exactly one transition for each char in alphabet. -* Accept if sequence of transitions lead to halt state. - -Alt text - -DFA state = number of characters in pattern that have been matched (length -of longest prefix of `pat[]` that is a suffix of `txt[0...i]`). - -To compute DFA: If in state j and next char `c != pat.charAt(j)`, -then the last j - 1 characters of input are `pat[1...j - 1]`, -followed by `c`. Simulate `pat[1...j - 1]` on DFA and take transition c. - -For each state j and char `c != pat.charAt(j)`, set `dfa[c][j] = dfa[c][X]`, -then update `X = dfa[pat.charAt(j)][X]`. X is the simulation of `pat[1...j - 1]` on DFA. - -> This is the implementation using DFA. -> -{style = "note"} - -Java (Princeton) - -```Java -public class KMP { - private final int R; // the radix - private final int m; // length of pattern - private final int[][] dfa; // the KMP automaton - - /** - * Preprocesses the pattern string. - * - * @param pat the pattern string - */ - public KMP(String pat) { - this.R = 256; - this.m = pat.length(); - - // build DFA from pattern - dfa = new int[R][m]; - dfa[pat.charAt(0)][0] = 1; - for (int x = 0, j = 1; j < m; j++) { - for (int c = 0; c < R; c++) - dfa[c][j] = dfa[c][x]; // Copy mismatch cases. - dfa[pat.charAt(j)][j] = j+1; // Set match case. - x = dfa[pat.charAt(j)][x]; // Update restart state. - } - } - - /** - * Preprocesses the pattern string. - * - * @param pattern the pattern string - * @param R the alphabet size - */ - public KMP(char[] pattern, int R) { - this.R = R; - this.m = pattern.length; - - // build DFA from pattern - int m = pattern.length; - dfa = new int[R][m]; - dfa[pattern[0]][0] = 1; - for (int x = 0, j = 1; j < m; j++) { - for (int c = 0; c < R; c++) - dfa[c][j] = dfa[c][x]; // Copy mismatch cases. - dfa[pattern[j]][j] = j+1; // Set match case. - x = dfa[pattern[j]][x]; // Update restart state. - } - } - - /** - * Returns the index of the first occurrence of the pattern string - * in the text string. - * - * @param txt the text string - * @return the index of the first occurrence of the pattern string - * in the text string; N if no such match - */ - public int search(String txt) { - - // simulate operation of DFA on text - int n = txt.length(); - int i, j; - for (i = 0, j = 0; i < n && j < m; i++) { - j = dfa[txt.charAt(i)][j]; - } - if (j == m) return i - m; // found - return n; // not found - } - - /** - * Returns the index of the first occurrence of the pattern string - * in the text string. - * - * @param text the text string - * @return the index of the first occurrence of the pattern string - * in the text string; N if no such match - */ - public int search(char[] text) { - - // simulate operation of DFA on text - int n = text.length; - int i, j; - for (i = 0, j = 0; i < n && j < m; i++) { - j = dfa[text[i]][j]; - } - if (j == m) return i - m; // found - return n; // not found - } -} -``` - -C++ - -```C++ -#include -#include - -class KMP { -private: - int R; // the radix - int m; // length of pattern - std::vector> dfa; // the KMP automaton - -public: - // Preprocesses the pattern string. - KMP(std::string pat) { - this->R = 256; - this->m = pat.length(); - - // build DFA from pattern - dfa = std::vector>(R, std::vector(m)); - dfa[pat[0]][0] = 1; - for (int x = 0, j = 1; j < m; j++) { - for (int c = 0; c < R; c++) - dfa[c][j] = dfa[c][x]; // Copy mismatch cases. - dfa[pat[j]][j] = j+1; // Set match case. - x = dfa[pat[j]][x]; // Update restart state. - } - } - - // Returns the index of the first occurrence of the pattern string - // in the text string. - int search(std::string txt) { - - // simulate operation of DFA on text - int n = txt.length(); - int i, j; - for (i = 0, j = 0; i < n && j < m; i++) { - j = dfa[txt[i]][j]; - } - if (j == m) return i - m; // found - return n; // not found - } -}; -``` - -Python +

    Find A Negative Cycle:

    -```Python -class KMP: - def __init__(self, pat): - self.R = 256 # the radix - self.m = len(pat) # length of pattern - - # build DFA from pattern - self.dfa = [[0 for _ in range(self.m)] for _ in range(self.R)] - self.dfa[ord(pat[0])][0] = 1 - x = 0 - for j in range(1, self.m): - for c in range(self.R): - self.dfa[c][j] = self.dfa[c][x] # Copy mismatch cases. - self.dfa[ord(pat[j])][j] = j + 1 # Set match case. - x = self.dfa[ord(pat[j])][x] # Update restart state. - - def search(self, txt): - # simulate operation of DFA on text - n = len(txt) - i, j = 0, 0 - while i < n and j < self.m: - j = self.dfa[ord(txt[i])][j] - i += 1 - if j == self.m: - return i - self.m # found - return n # not found -``` +

    If there is a negative cycle, Bellman-Ford gets stuck in loop, +updating distTo[] and edgeTo[] entries of vertices in the cycle.

    -#### 19.3.3 NFA +

    If any vertex v is updated in phase V, there exists a negative +cycle (and can trace back edgeTo[v] entries to find it).

    -Example: A B A B A C - -lps: 0 0 1 2 3 0 +

    Application - Arbitrage Detection +

    -

    Explanantion for k = lps[k - 1] -in computePrefix:

    +

    Currency exchange graph.

    - -
  • -

    When k reaches 3, q = 5, the position now is C. -The current prefix (also the suffix, without considering C -) is "ABA".

    -

    ABA BA

    -

    AB ABA

    -
  • -
  • -

    Since C is a mismatch for pattern[3] = B -, we need to first find the longest prefix in "ABA" that is -also a suffix.

    -

    ABAB - AC

    -

    AB ABAC -

    -
  • +
  • -

    The longest prefix and suffix in ABA is A, - which is given by lps[q - 1] = lps[2] = 1.

    +

    Vertex = currency.

  • -

    At this time, we need to try again if C is a match for -the character behind the pattern[1] = B, -which is not.

    -

    AB - ABAC

    -

    ABAB AC -

    +

    Edge = transaction, with weight equal to exchange rate.

  • -

    The longest prefix and suffix in "A" is "", k = 0, -lps[5] = 0.

    +

    Find a directed cycle whose product of edge weights is > 1.

  • -> This is the implementation using NFA. -> -{style = "tip"} - -Java - -```Java -public class KMP { - private int[] computeTemporaryArray(char pattern[]) { - int[] lps = new int[pattern.length]; - int index = 0; - for (int i = 1; i < pattern.length;) { - if (pattern[i] == pattern[index]) { - lps[i] = index + 1; - index++; - i++; - } else { - if (index != 0) { - index = lps[index - 1]; - } else { - lps[i] = 0; - i++; - } - } - } - return lps; - } - - public boolean KMP(char text[], char pattern[]) { - int lps[] = computeTemporaryArray(pattern); - int i = 0; - int j = 0; - while (i < text.length && j < pattern.length) { - if (text[i] == pattern[j]) { - i++; - j++; - } else { - if (j != 0) { - j = lps[j - 1]; - } else { - i++; - } - } - } - if (j == pattern.length) { - return true; - } - return false; - } -} -``` - -C++ - -```C++ -#include -#include - -std::vector computePrefixFunction(const std::string& pattern) { - int m = pattern.length(); - std::vector lps(m); - lps[0] = 0; - - int k = 0; // Length of the longest prefix & suffix - for (int q = 1; q < m; q++) { // q is the position - while (k > 0 && pattern[k] != pattern[q]) - k = lps[k-1]; - - if (pattern[k] == pattern[q]) - k++; - - lps[q] = k; - } - - return lps; -} - -std::vector KMP(const std::string& text, const std::string& pattern) { - int n = text.length(); - int m = pattern.length(); - std::vector longestPrefix = computePrefixFunction(pattern); - std::vector occurrences; - - int q = 0; - for (int i = 0; i < n; i++) { - while (q > 0 && pattern[q] != text[i]) - q = longestPrefix[q-1]; - - if (pattern[q] == text[i]) - q++; +Arbitrage Detection - if (q == m) { - occurrences.push_back(i - m + 1); - q = longestPrefix[q-1]; - } - } - - return occurrences; -} -``` + + +

    Let weight of edge v→w be - ln + (exchange rate from currency v to w).

    +
    + +

    Multiplication turns to addition; \gt 1 turns to + \lt 0.

    +
    + +

    Find a directed cycle whose sum of edge weights is \lt 0 + (negative cycle).

    +
    +
    -Python +## 18 Maximum Flow and Minimum Cut -```Python -class KMP: - def __init__(self, pattern): - self.table = None - self.pattern = pattern - self.build_table() - - def build_table(self): - self.table = [-1] + [0] * len(self.pattern) - j = -1 - for i in range(len(self.pattern)): - while j >= 0 and self.pattern[j] != self.pattern[i]: - j = self.table[j] - j += 1 - if i + 1 < len(self.pattern) and self.pattern[j] != self.pattern[i + 1]: - self.table[i + 1] = j - else: - self.table[i + 1] = self.table[j] - - def search(self, text): - i = j = 0 - while i < len(text): - while j >= 0 and text[i] != self.pattern[j]: - j = self.table[j] - i += 1 - j += 1 - if j == len(self.pattern): - return i - j - return -1 -``` \ No newline at end of file diff --git a/Writerside/topics/Data-Structures-and-Algorithms-3.md b/Writerside/topics/Data-Structures-and-Algorithms-3.md index 2955aec..db0dd7e 100644 --- a/Writerside/topics/Data-Structures-and-Algorithms-3.md +++ b/Writerside/topics/Data-Structures-and-Algorithms-3.md @@ -1,5 +1,520 @@ # Data Structures and Algorithms 3 +## 19 Substring Search + +### 19.1 Introduction + + +
  • +

    Goal: Find pattern of length +M in text of length N (typically +N >> M).

    +
  • +
  • +

    Applications: Find & replace, +computer forensics, identify patterns indicative of spam, +electronic surveillance, screen scraping, etc.

    +
  • +
    + +### 19.2 Brute-Force Substring Search + +* Theoretical challenge: Linear-time guarantee. + (Worst case: \sim MN) +* Practical challenge: Avoid backup in text stream. (Brute-force + algorithm needs backup for every mismatch) + +Java + +```Java +public static int search (String pat, String txt) { + int M = pat.length(); + int N = txt.length(); + int i, j; + for (i = 0; i <= N - M; i++) { + for (j = 0; j < M; j++) { + if (txt.charAt(i + j) != pat.charAt(j)) { + break; + } + } + if (j == M) { + return i; + } + } + return N; +} +``` + +Java (Alternate Implementation) + +```Java +/** + * Same sequence of char compares as previous implementation. + *

    + * {@code i} points to end of sequence of already-matched chars + * in text. + *

    + * {@code j} stores number of already-matchedchars (end of + * sequence in pattern). + */ +public static int search(String pat, String txt) { + int i, M = pat.length(); + int j, N = txt.length(); + for (i = 0, j = 0; i < N && j < M; i++) { + if (txt.charAt(i) == pat.charAt(j)) { + j++; + } + else { + i -= j; + j = 0; + } + } + if (j == M) { + return i - M; + } + else { + return N; + } +} +``` + +C++ + +```C++ +int bruteForceSubstringSearch(const std::string& text, const std::string& pattern) { + int n = text.length(); + int m = pattern.length(); + + for (int i = 0; i <= n - m; i++) { + int j; + for (j = 0; j < m; j++) { + if (text[i + j] != pattern[j]) { + break; + } + } + if (j == m) { + return i; + } + } + return -1; +} +``` + +Python + +```Python +def brute_force_search(main_string, sub_string): + len_main = len(main_string) + len_sub = len(sub_string) + + for i in range(len_main - len_sub + 1): + j = 0 + + while(j < len_sub): + if (main_string[i + j] != sub_string[j]): + break + j += 1 + + if (j == len_sub): + return i + + return -1 +``` + +### 19.3 Knuth-Morris-Pratt + +#### 19.3.1 Proposition + +* KMP substring search accesses no more than M + N + chars to search for a pattern of length M in a text of + length N. + +> Proof: Each pattern char accessed once when constructing DFA; +> each text char accessed once (in the worst case) when simulating +> DFA. +> +{style = "tip"} + +* KMP constructs `dfa[][]` in time and space proportional to RM, + where R is the alphabet size and M is the pattern + length. + +> Improved version of KMP constructs `nfa[]` in time and space +> proportional to M. +> +{style = "tip"} + +#### 19.3.2 DFA + +Deterministic Finite State Automaton (DFA) is an abstract +string-search machine. + +* Finite number of states (including start and halt). +* Exactly one transition for each char in alphabet. +* Accept if sequence of transitions lead to halt state. + +Alt text + +DFA state = number of characters in pattern that have been matched (length +of longest prefix of `pat[]` that is a suffix of `txt[0...i]`). + +To compute DFA: If in state j and next char `c != pat.charAt(j)`, +then the last j - 1 characters of input are `pat[1...j - 1]`, +followed by `c`. Simulate `pat[1...j - 1]` on DFA and take transition c. + +For each state j and char `c != pat.charAt(j)`, set `dfa[c][j] = dfa[c][X]`, +then update `X = dfa[pat.charAt(j)][X]`. X is the simulation of `pat[1...j - 1]` on DFA. + +> This is the implementation using DFA. +> +{style = "note"} + +Java (Princeton) + +```Java +public class KMP { + private final int R; // the radix + private final int m; // length of pattern + private final int[][] dfa; // the KMP automaton + + /** + * Preprocesses the pattern string. + * + * @param pat the pattern string + */ + public KMP(String pat) { + this.R = 256; + this.m = pat.length(); + + // build DFA from pattern + dfa = new int[R][m]; + dfa[pat.charAt(0)][0] = 1; + for (int x = 0, j = 1; j < m; j++) { + for (int c = 0; c < R; c++) + dfa[c][j] = dfa[c][x]; // Copy mismatch cases. + dfa[pat.charAt(j)][j] = j+1; // Set match case. + x = dfa[pat.charAt(j)][x]; // Update restart state. + } + } + + /** + * Preprocesses the pattern string. + * + * @param pattern the pattern string + * @param R the alphabet size + */ + public KMP(char[] pattern, int R) { + this.R = R; + this.m = pattern.length; + + // build DFA from pattern + int m = pattern.length; + dfa = new int[R][m]; + dfa[pattern[0]][0] = 1; + for (int x = 0, j = 1; j < m; j++) { + for (int c = 0; c < R; c++) + dfa[c][j] = dfa[c][x]; // Copy mismatch cases. + dfa[pattern[j]][j] = j+1; // Set match case. + x = dfa[pattern[j]][x]; // Update restart state. + } + } + + /** + * Returns the index of the first occurrence of the pattern string + * in the text string. + * + * @param txt the text string + * @return the index of the first occurrence of the pattern string + * in the text string; N if no such match + */ + public int search(String txt) { + + // simulate operation of DFA on text + int n = txt.length(); + int i, j; + for (i = 0, j = 0; i < n && j < m; i++) { + j = dfa[txt.charAt(i)][j]; + } + if (j == m) return i - m; // found + return n; // not found + } + + /** + * Returns the index of the first occurrence of the pattern string + * in the text string. + * + * @param text the text string + * @return the index of the first occurrence of the pattern string + * in the text string; N if no such match + */ + public int search(char[] text) { + + // simulate operation of DFA on text + int n = text.length; + int i, j; + for (i = 0, j = 0; i < n && j < m; i++) { + j = dfa[text[i]][j]; + } + if (j == m) return i - m; // found + return n; // not found + } +} +``` + +C++ + +```C++ +#include +#include + +class KMP { +private: + int R; // the radix + int m; // length of pattern + std::vector> dfa; // the KMP automaton + +public: + // Preprocesses the pattern string. + KMP(std::string pat) { + this->R = 256; + this->m = pat.length(); + + // build DFA from pattern + dfa = std::vector>(R, std::vector(m)); + dfa[pat[0]][0] = 1; + for (int x = 0, j = 1; j < m; j++) { + for (int c = 0; c < R; c++) + dfa[c][j] = dfa[c][x]; // Copy mismatch cases. + dfa[pat[j]][j] = j+1; // Set match case. + x = dfa[pat[j]][x]; // Update restart state. + } + } + + // Returns the index of the first occurrence of the pattern string + // in the text string. + int search(std::string txt) { + + // simulate operation of DFA on text + int n = txt.length(); + int i, j; + for (i = 0, j = 0; i < n && j < m; i++) { + j = dfa[txt[i]][j]; + } + if (j == m) return i - m; // found + return n; // not found + } +}; +``` + +Python + +```Python +class KMP: + def __init__(self, pat): + self.R = 256 # the radix + self.m = len(pat) # length of pattern + + # build DFA from pattern + self.dfa = [[0 for _ in range(self.m)] for _ in range(self.R)] + self.dfa[ord(pat[0])][0] = 1 + x = 0 + for j in range(1, self.m): + for c in range(self.R): + self.dfa[c][j] = self.dfa[c][x] # Copy mismatch cases. + self.dfa[ord(pat[j])][j] = j + 1 # Set match case. + x = self.dfa[ord(pat[j])][x] # Update restart state. + + def search(self, txt): + # simulate operation of DFA on text + n = len(txt) + i, j = 0, 0 + while i < n and j < self.m: + j = self.dfa[ord(txt[i])][j] + i += 1 + if j == self.m: + return i - self.m # found + return n # not found +``` + +#### 19.3.3 NFA + +Example: A B A B A C + +lps: 0 0 1 2 3 0 + +

    Explanantion for k = lps[k - 1] +in computePrefix:

    + + +
  • +

    When k reaches 3, q = 5, the position now is C. +The current prefix (also the suffix, without considering C +) is "ABA".

    +

    ABA BA

    +

    AB ABA

    +
  • +
  • +

    Since C is a mismatch for pattern[3] = B +, we need to first find the longest prefix in "ABA" that is +also a suffix.

    +

    ABAB + AC

    +

    AB ABAC +

    +
  • +
  • +

    The longest prefix and suffix in ABA is A, + which is given by lps[q - 1] = lps[2] = 1.

    +
  • +
  • +

    At this time, we need to try again if C is a match for +the character behind the pattern[1] = B, +which is not.

    +

    AB + ABAC

    +

    ABAB AC +

    +
  • +
  • +

    The longest prefix and suffix in "A" is "", k = 0, +lps[5] = 0.

    +
  • +
    + +> This is the implementation using NFA. +> +{style = "tip"} + +Java + +```Java +public class KMP { + private int[] computeTemporaryArray(char pattern[]) { + int[] lps = new int[pattern.length]; + int index = 0; + for (int i = 1; i < pattern.length;) { + if (pattern[i] == pattern[index]) { + lps[i] = index + 1; + index++; + i++; + } else { + if (index != 0) { + index = lps[index - 1]; + } else { + lps[i] = 0; + i++; + } + } + } + return lps; + } + + public boolean KMP(char text[], char pattern[]) { + int lps[] = computeTemporaryArray(pattern); + int i = 0; + int j = 0; + while (i < text.length && j < pattern.length) { + if (text[i] == pattern[j]) { + i++; + j++; + } else { + if (j != 0) { + j = lps[j - 1]; + } else { + i++; + } + } + } + if (j == pattern.length) { + return true; + } + return false; + } +} +``` + +C++ + +```C++ +#include +#include + +std::vector computePrefixFunction(const std::string& pattern) { + int m = pattern.length(); + std::vector lps(m); + lps[0] = 0; + + int k = 0; // Length of the longest prefix & suffix + for (int q = 1; q < m; q++) { // q is the position + while (k > 0 && pattern[k] != pattern[q]) + k = lps[k-1]; + + if (pattern[k] == pattern[q]) + k++; + + lps[q] = k; + } + + return lps; +} + +std::vector KMP(const std::string& text, const std::string& pattern) { + int n = text.length(); + int m = pattern.length(); + std::vector longestPrefix = computePrefixFunction(pattern); + std::vector occurrences; + + int q = 0; + for (int i = 0; i < n; i++) { + while (q > 0 && pattern[q] != text[i]) + q = longestPrefix[q-1]; + + if (pattern[q] == text[i]) + q++; + + if (q == m) { + occurrences.push_back(i - m + 1); + q = longestPrefix[q-1]; + } + } + + return occurrences; +} +``` + +Python + +```Python +class KMP: + def __init__(self, pattern): + self.table = None + self.pattern = pattern + self.build_table() + + def build_table(self): + self.table = [-1] + [0] * len(self.pattern) + j = -1 + for i in range(len(self.pattern)): + while j >= 0 and self.pattern[j] != self.pattern[i]: + j = self.table[j] + j += 1 + if i + 1 < len(self.pattern) and self.pattern[j] != self.pattern[i + 1]: + self.table[i + 1] = j + else: + self.table[i + 1] = self.table[j] + + def search(self, text): + i = j = 0 + while i < len(text): + while j >= 0 and text[i] != self.pattern[j]: + j = self.table[j] + i += 1 + j += 1 + if j == len(self.pattern): + return i - j + return -1 +``` + ## 19 Catalan Number ### 19.1 Properties and Formulas