📌  相关文章
📜  查找字符串(S) 中子串的起始索引,它是通过连接列表 (L) 中的所有单词而制成的

📅  最后修改于: 2021-10-27 07:40:30             🧑  作者: Mango

给定一个字符串S和一个单词列表L ,即字符串数组/向量(列表 L 中的单词的长度都相同)。查找字符串S 中子字符串的起始索引,其中包含列表 L 中存在的所有单词。

出现在字符串S 中的列表 L 的单词顺序无关紧要,即如果字符串S 是“barfooapplefoobar”并且单词列表 (L) 是 [“foo”, “bar”] 那么我们必须寻找子字符串“foobar”,字符串S 中的“barfoo”。

注:表L内的单词可以重复。

例子 :

Input : S: "barfoothefoobarman" 
        L: ["foo", "bar"]                     
Output : 0 9
Explanation : 
// at index 0 : barfoo
// at index 9 : foobar 

Input : S: "catbatatecatatebat"
        L: ["cat", "ate", "bat"] 
Output : 0 3 9
Explanation : 
// at index 0 : catbatate
// at index 3 : batatecat
// at index 9 : catatebat    

Input : S : "abcdababcd"
        L : ["ab", "ab", "cd"]
Output : 0 2 4 
Explanation :
// at index 0 : abcdab
// at index 2 : cdabab
// at index 4 : ababcd

Input : S : "abcdababcd"
        L : ["ab", "ab"]
Output : 4

方法 :
我们可以使用哈希技术来解决上述问题。让我们看看步骤:

  1. 声明一个映射( hash_map ),它存储列表 L 中与它们在列表 L 中的出现相对应的所有单词。
  2. 遍历字符串S 中所有可能的子字符串,它们等于 size_L(如果将列表 L 中的所有单词连接起来,则产生的字符总数)。
  3. 创建一个临时映射( temp_hash_map )并使用原始映射( hash_map )为每个可能的子字符串初始化它。
  4. 从子字符串中提取单词,如果单词出现在 temp_hash_map 中,我们减少它的对应计数,如果它不存在于 temp_hash_map 中,我们只需中断。
  5. 遍历子字符串后,我们遍历 temp_hash_map 并查找任何计数 > 0 的键。如果我们找不到这样的键,则意味着列表 L 中的所有单词都在子字符串中找到并存储子字符串的给定起始索引,如果我们找到一个计数 > 0 的键,这意味着我们没有遍历整个子字符串,因为我们遇到了一个不在 temp_hash_map 中的词。

以下是上述方法的实现:

C++
// CPP program to calculate the starting indices
// of substrings inside S which contains all the
// words present in List L.
#include 
using namespace std;
  
// Returns an integer vector consisting of starting
// indices of substrings present inside the string S
vector findSubstringIndices(string S, 
                            const vector& L)
{
  
    // Number of a characters of a word in list L.
    int size_word = L[0].size();
  
    // Number of words present inside list L.
    int word_count = L.size();
  
    // Total characters present in list L.
    int size_L = size_word * word_count;
  
    // Resultant vector which stores indices.
    vector res;
  
    // If the total number of characters in list L
    // is more than length of string S itself.
    if (size_L > S.size())
        return res;
  
    // Map stores the words present in list L
    // against it's occurrences inside list L
    unordered_map hash_map;
  
    for (int i = 0; i < word_count; i++) 
        hash_map[L[i]]++;    
  
    for (int i = 0; i <= S.size() - size_L; i++) {
        unordered_map temp_hash_map(hash_map);
  
        int j = i,count=word_count;
  
        // Traverse the substring
        while (j < i + size_L) {
  
            // Extract the word
            string word = S.substr(j, size_word);
  
  
            // If word not found or if frequency of current word is more than required simply break.
            if (hash_map.find(word) == hash_map.end()||temp_hash_map[word]==0)
                break;
  
            // Else decrement the count of word from hash_map
            else
               { temp_hash_map[word]--;count--;} 
  
  
            j += size_word;
        }
       
        // Store the starting index of that substring when all the words in the list are in substring
        if (count == 0)
            res.push_back(i);
    }
  
    return res;
}
  
// Driver Code
int main()
{
    string S = "barfoothefoobarman";
    vector L = { "foo", "bar" };
    vector indices = findSubstringIndices(S, L);
    for (int i = 0; i < indices.size(); i++)
        cout << indices[i] << " ";
    return 0;
}


Java
// Java program to calculate the starting indices
// of substrings inside S which contains all the
// words present in List L.
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
  
class GFG 
{
    public static ArrayList 
    findSubstring(String A, final List B) 
    {
  
        // Number of a characters of a word in list L.
        int size_word = B.get(0).length();
          
        // Number of words present inside list L. 
        int word_count = B.size();
  
        // Total characters present in list L.
        int size_l = size_word * word_count;
  
        // Resultant vector which stores indices.
        ArrayList res = new ArrayList();
        int n = A.length();
          
        // If the total number of characters in list L 
        // is more than length of string S itself.
        if (size_l > n) 
        {
            return res;
        }
  
        // Map stores the words present in list L 
        // against it's occurrences inside list L 
        HashMap hashMap = 
            new HashMap();
  
        for (String word : B) 
        {
            hashMap.put(word, hashMap.getOrDefault(word, 0) + 1);
        }
  
          
        for (int i = 0; i <= n - size_l; i++) 
        {
            HashMap tempMap = 
            (HashMap) hashMap.clone();
            int j = i, count = word_count;
              
            // Traverse the substring 
            while (j < i + size_l) 
            {
                // Extract the word
                String word = A.substring(j, j + size_word);
              
                // If word not found or if frequency 
                // of current word is more than required simply break. 
                if (!hashMap.containsKey(word) || tempMap.get(word) == 0) 
                {
                    break;
                } 
                  
                // Else decrement the count of word from hash_map
                else 
                {
                    tempMap.put(word, tempMap.get(word) - 1);
                    count--;
                }
                j += size_word;
            }
              
            // Store the starting index of that
            // substring when all the words in 
            // the list are in substring 
            if (count == 0)
            {
                res.add(i);
            }
  
        }
        return res;
    }
  
    // Driver code
    public static void main(String[] args) 
    {
        String S = "barfoothefoobarman";
        ArrayList L = 
        new ArrayList<>(Arrays.asList("foo", "bar"));
        ArrayList indices = findSubstring(S, L);
        for (Integer i : indices)
        {
            System.out.println(i);
        }
    }
}
  
// This code is contributed by Manish Sakariya


Python3
# Python3 program to calculate the starting indices
# of substrings inside S which contains all the
# words present in List L.
  
# Returns an integer vector consisting of starting
# indices of substrings present inside the string S
def findSubStringIndices(s, L):
  
    # Number of a characters of a word in list L.
    size_word = len(L[0])
  
    # Number of words present inside list L.
    word_count = len(L)
  
    # Total characters present in list L.
    size_L = size_word * word_count
  
    # Resultant vector which stores indices.
    res = []
  
    # If the total number of characters in list L
    # is more than length of string S itself.
    if size_L > len(s):
        return res
  
    # Map stores the words present in list L
    # against it's occurrences inside list L
    hash_map = dict()
  
    for i in range(word_count):
        if L[i] in hash_map:
            hash_map[L[i]] += 1
        else:
            hash_map[L[i]] = 1
  
    for i in range(0, len(s) - size_L + 1, 1):
        temp_hash_map = hash_map.copy()
        j = i
        count = word_count
  
        # Traverse the substring
        while j < i + size_L:
  
            # Extract the word
            word = s[j:j + size_word]
  
            # If word not found or if frequency of 
            # current word is more than required simply break.
            if (word not in hash_map or 
                temp_hash_map[word] == 0):
                break
  
            # Else decrement the count of word
            # from hash_map
            else:
                temp_hash_map[word] -= 1
                count -= 1
            j += size_word
  
        # Store the starting index of that substring
        # when all the words in the list are in substring
        if count == 0:
            res.append(i)
    return res
  
# Driver Code
if __name__ == "__main__":
    s = "barfoothefoobarman"
    L = ["foo", "bar"]
    indices = findSubStringIndices(s, L)
      
    print(*indices)
  
# This code is contributed by
# sanjeev2552


输出:

0 9

时间复杂度: O(N – K) * K
N :字符串S 的长度。
K :如果所有单词都连接起来,则列表 L 的总长度。如果 L : [“ab”, “cd”] 那么 K = 4。

如果您希望与专家一起参加现场课程,请参阅DSA 现场工作专业课程学生竞争性编程现场课程