411. Minimum Unique Word Abbreviation

Description

A string such as"word"contains the following abbreviations:

["word", "1ord", "w1rd", "wo1d", "wor1", "2rd", "w2d", "wo2", "1o1d", "1or1", "w1r1", "1o2", "2r1", "3d", "w3", "4"]

Given a target string and a set of strings in a dictionary, find an abbreviation of this target string with the smallest possible length such that it does not conflict with abbreviations of the strings in the dictionary.

Each number or letter in the abbreviation is considered length = 1. For example, the abbreviation "a32bc" has length = 4.

Note:

  • In the case of multiple answers as shown in the second example below, you may return any one of them.
  • Assume length of target string =m, and dictionary size =n. You may assume that m ≤ 21,n ≤ 1000, and log2(n) + m ≤ 20.

Examples:

"apple", ["blade"] ->  "a4" (because "5" or "4e" conflicts with "blade")

"apple", ["plain", "amber", "blade"] -> "1p3" (other valid answers include "ap3", "a3e", "2p2", "3le", "3l1").

Solution

method 1:

get all abbrivation of target, validate all the words in dict with same length.

method 2:

bitmask + dfs

Details in following code:

class Solution {
    int n, cand, bn, minlen, minab;
    vector<int> dict;

    // Return the length of abbreviation given bit sequence
    int abbrLen(int mask) {
        int count = 0;
        for (int b = 1; b < bn;) {
            if ((mask & b) == 0)
                for (; b < bn and (mask & b) == 0; b <<= 1);
            else b <<= 1;
            count ++;
        }
        return count;
    }

    // DFS backtracking
    void dfs(int bit, int mask) {
        // length of this abbrv mask
        int len = abbrLen(mask);
        if (len >= minlen) return;

        bool match = true;
        /* key logic:
           find a mask, for all the dict, it contains at least one pos that mask is not 0 and dict is not 0.
           this mask is a candidate.
        */ 
        for (auto d : dict) {
            if ((mask & d) == 0) {
                match = false;
                break;
            }
        }


        if (match) {
            minlen = len;
            minab = mask;
        }
        else
            for (int b = bit; b < bn; b <<= 1)
                if (cand & b) dfs(b << 1, mask + b);
    }

public:
    string minAbbreviation(string target, vector<string>& dictionary) {
        n = target.size(), bn = 1 << n, cand = 0, minlen = INT_MAX;
        string res;

        // Preprocessing with bit manipulation

        // get bit mask for every word with same length as target.
        // or all the bitmask together
        for (auto w : dictionary) {
            int word = 0;
            if (w.size() != n) continue;
            for (int i = n-1, bit = 1; i >= 0; --i, bit <<= 1)
                if (target[i] != w[i]) word += bit;
            dict.push_back(word);
            cand |= word;
        }

        /*
            if candidates is 10101:
                the dfs will try
                00001
                00100
                10000
                00101
                10001
                .....
        */

        dfs(1, 0);

        // Reconstruct abbreviation from bit sequence
        for (int i = n-1, pre = i; i >= 0; --i, minab >>= 1) {
            if (minab & 1) {
                if (pre-i > 0) res = to_string(pre-i) + res;
                pre = i - 1;
                res = target[i] + res;
            }
            else if (i == 0) res = to_string(pre-i+1) + res;
        }
        return res;
    }
};

results matching ""

    No results matching ""