题目:You are given a string, s, and a list of words, words, that are all of the same length. Find all starting indices of substring(s) in s that is a concatenation of each word in words exactly once and without any intervening characters.
For example, given: s: “barfoothefoobarman” words: [“foo”, “bar”]
You should return the indices: [0,9]. (order does not matter).
题目就是为了获得s中包含words所有词的子串的索引位置。首先我们可以想到一种最简单的思路就是把words中每个单词以及它们出现的次数保存在Map中,然后遍历s,对每个位置都进行判断其子串是否包含了Map中的所有单词。代码入下:
public static List<Integer> findSubstring(String s, String[] words) {
List<Integer> res = new ArrayList<>();
if(s == null || words.length == 0)
return res;
HashMap<String, Integer> map = new HashMap<>();
//将words的信息保存到Map中
for(int i=0; i<words.length; i++)
map.put(words[i], map.getOrDefault(words[i], 0)+1);
int len = words[0].length();
//子串长度为word.length*len, 然后遍历s时就只需要遍历前length位就行了
int length = s.length() - words.length * len;
for(int i=0; i<=length; i++){
//注意这里不能直接使用map,这样会在dfs函数中把map清空,应该是用其复制才能保证map保存的信息不变
if(dfs(s, (Map)map.clone(), i, len))
res.add(i);
}
return res;
}
public static boolean dfs(String s, Map<String, Integer> map, int start_idx, int len){
//直到map清空才说明成功
if(map.isEmpty())
return true;
String ss = s.substring(start_idx, start_idx+len);
if(map.containsKey(ss)){
int count = map.get(ss);
count --;
if(count == 0)
map.remove(ss);
else
map.put(ss, count);
return dfs(s, map, start_idx+len, len);
}
return false;
}
但是这种方法对s字符串遍历时进行了过多的重复操作,而且每次调用函数都会对map进行复制,造成极大浪费,所以导致运行超时。那么我们该如何改进呢,我们首先想到把dfs函数拆解开,这样就可以避免对map的重复拷贝可以节省很多时间。这种方法击败了26%的用户。
public static List<Integer> findSubstring2(String S, String[] L) {
List<Integer> res = new ArrayList<Integer>();
if (S == null || L == null || L.length == 0) return res;
int len = L[0].length(); // length of each word
Map<String, Integer> map = new HashMap<String, Integer>(); // map for L
for (String w : L) map.put(w, map.containsKey(w) ? map.get(w) + 1 : 1);
for (int i = 0; i <= S.length() - len * L.length; i++) {
Map<String, Integer> copy = new HashMap<String, Integer>(map);
for (int j = 0; j < L.length; j++) { // checkc if match
String str = S.substring(i + j*len, i + j*len + len); // next word
if (copy.containsKey(str)) { // is in remaining words
int count = copy.get(str);
if (count == 1) copy.remove(str);
else copy.put(str, count - 1);
if (copy.isEmpty()) { // matches
res.add(i);
break;
}
} else break; // not in L
}
}
return res;
}
还可以如何改进呢,在网上找打了下面两种做法,分别击败了75%和97%的用户。
public static List<Integer> findSubstring1(String s, String[] words) {
List<Integer> res = new ArrayList<>();
if(words == null || words.length == 0 || s.length() == 0) return res;
int wordLen = words[0].length();
int numWord = words.length;
int windowLen = wordLen * numWord;
int sLen = s.length();
HashMap<String, Integer> map = new HashMap<>();
for(String word : words) map.put(word, map.getOrDefault(word, 0) + 1);
for(int i = 0; i < wordLen; i++) { // Run wordLen scans
HashMap<String, Integer> curMap = new HashMap<>();
for(int j = i, count = 0, start = i; j + wordLen <= sLen; j += wordLen) { // Move window in step of wordLen
// count: number of exceeded occurences in current window
// start: start index of current window of size windowLen
if(start + windowLen > sLen) break;
String word = s.substring(j, j + wordLen);
if(!map.containsKey(word)) {
curMap.clear();
count = 0;
start = j + wordLen;
}
else {
if(j == start + windowLen) { // Remove previous word of current window
String preWord = s.substring(start, start + wordLen);
start += wordLen;
int val = curMap.get(preWord);
if(val == 1) curMap.remove(preWord);
else curMap.put(preWord, val - 1);
if(val - 1 >= map.get(preWord)) count--; // Reduce count of exceeded word
}
// Add new word
curMap.put(word, curMap.getOrDefault(word, 0) + 1);
if(curMap.get(word) > map.get(word)) count++; // More than expected, increase count
// Check if current window valid
if(count == 0 && start + windowLen == j + wordLen) {
res.add(start);
}
}
}
}
return res;
}
方法二:
public List<Integer> findSubstring(String s, String[] words) {
/**
* Let n=s.length, k=words[0].length traverse s with indices i, i+k,
* i+2k, ... for 0<=i<k, so that the time complexity is O(n).
*/
List<Integer> res = new ArrayList<Integer>();
int n = s.length(), m = words.length, k;
if (n == 0 || m == 0 || (k = words[0].length()) == 0)
return res;
HashMap<String, Integer> wDict = new HashMap<String, Integer>();
for (String word : words) {
if (wDict.containsKey(word))
wDict.put(word, wDict.get(word) + 1);
else
wDict.put(word, 1);
}
int i, j, start, x, wordsLen = m * k;
HashMap<String, Integer> curDict = new HashMap<String, Integer>();
String test, temp;
for (i = 0; i < k; i++) {
curDict.clear();
start = i;
if (start + wordsLen > n)
return res;
for (j = i; j + k <= n; j += k) {
test = s.substring(j, j + k);
if (wDict.containsKey(test)) {
if (!curDict.containsKey(test)) {
curDict.put(test, 1);
start = checkFound(res, start, wordsLen, j, k, curDict, s);
continue;
}
// curDict.containsKey(test)
x = curDict.get(test);
if (x < wDict.get(test)) {
curDict.put(test, x + 1);
start = checkFound(res, start, wordsLen, j, k, curDict, s);
continue;
}
// curDict.get(test)==wDict.get(test), slide start to
// the next word of the first same word as test
while (!(temp = s.substring(start, start + k)).equals(test)) {
decreaseCount(curDict, temp);
start += k;
}
start += k;
if (start + wordsLen > n)
break;
continue;
}
// totally failed up to index j+k, slide start and reset all
start = j + k;
if (start + wordsLen > n)
break;
curDict.clear();
}
}
return res;
}
public int checkFound(List<Integer> res, int start, int wordsLen, int j, int k,
HashMap<String, Integer> curDict, String s) {
if (start + wordsLen == j + k) {
res.add(start);
// slide start to the next word
decreaseCount(curDict, s.substring(start, start + k));
return start + k;
}
return start;
}
public void decreaseCount(HashMap<String, Integer> curDict, String key) {
// remove key if curDict.get(key)==1, otherwise decrease it by 1
int x = curDict.get(key);
if (x == 1)
curDict.remove(key);
else
curDict.put(key, x - 1);
}