Skip to content

Rabin-Karp implementation runs in O(n log n) due to using extended euclidean algorithm for modInverse #95

@BrandonTang89

Description

@BrandonTang89

Considering the following section in /ch6/string_matching.cpp, we call modInverse for every check (we check O(n) possible sub-strings of length m). The comment for hash_fast states it is O(1) but since the extended euclidean algorithm is O(log m), then our total time for string matching is O(n log m).

int extEuclid(int a, int b, int &x, int &y) { // pass x and y by ref
int xx = y = 0;
int yy = x = 1;
while (b) { // repeats until b == 0
int q = a/b;
tie(a, b) = tuple(b, a%b);
tie(x, xx) = tuple(xx, x-q*xx);
tie(y, yy) = tuple(yy, y-q*yy);
}
return a; // returns gcd(a, b)
}
int modInverse(int b, int m) { // returns b^(-1) (mod m)
int x, y;
int d = extEuclid(b, m, x, y); // to get b*x + m*y == d
if (d != 1) return -1; // to indicate failure
// b*x + m*y == 1, now apply (mod m) to get b*x == 1 (mod m)
return (x+m)%m; // this is the answer
}
int hash_fast(int L, int R) { // O(1) hash of any substr
if (L == 0) return h[R]; // h is the prefix hashes
int ans = 0;
ans = ((h[R] - h[L-1]) % M + M) % M; // compute differences
ans = ((ll)ans * modInverse(Pow[L], M)) % M; // remove P[L]^-1 (mod M)
return ans;
}

We can fix this by pre-computing the inverse of each P[i] using the following implementation in O(log m + n) since we only use modInverse once.

class RollingHash {
   public:
    vi P, H;   // P[i] = p^i mod m, H[i] is the hash of prefix length i
    vi P_inv;  // P_inv[i] = p^(-i) mod m
    const int n;
    string T;
    const ll p, M;

    RollingHash(string _s, int _p = 131, int _M = (int)1e9 + 7)
        : n(_s.size()), T(_s), p(_p), M(_M) {
        PrepareP();
        computeRollingHash();
    }
    void PrepareP() {  // precompute P and P_inv
        P.assign(n, 0);
        P[0] = 1;
        for (int i = 1; i < n; i++) P[i] = (P[i - 1] * p) % M;

        P_inv.assign(n, 0);
        P_inv[n - 1] = modInverse(P[n - 1], M);
        for (int i = n - 2; i >= 0; i--) P_inv[i] = (P_inv[i + 1] * p) % M;
    }

    void computeRollingHash() {  // precompute H
        H.assign(n, 0);
        for (int i = 0; i < n; i++) {
            if (i != 0) H[i] = H[i - 1];
            H[i] = (H[i] + ((ll)T[i] * P[i]) % M) % M;
        }
    }

    int getHash(int l, int r) {  // get hash of substring [l, r]
        if (l == 0) return H[r];
        int ans = ((H[r] - H[l - 1]) % M + M) % M;
        ans = ((ll)ans * P_inv[l]) % M;
        return ans;
    }
};

// Returns a vector of indices of all occurrences of pattern in text
vi rabinKarp(string P, string T) {
    RollingHash P_rolling(P);
    RollingHash T_rolling(T);
    vi matches;

    int n = T.size(), m = P.size();
    int p_hash = P_rolling.getHash(0, m - 1);
    for (int i = 0; i <= n - m; i++) {
        if (p_hash == T_rolling.getHash(i, i + m - 1)) {  // match
            matches.push_back(i);
        }
    }
    return matches;
};

Note that using the original code gets TLE for kattis stringmatching while the modified can AC.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions