Count number of Distinct Substring in a String
Given a string, count all distinct substrings of the given string.
Examples:
Input : abcd
Output : abcd abc ab a bcd bc b cd c d
All Elements are Distinct
Input : aaa
Output : aaa aa a aa a a
All elements are not Distinct
Prerequisite : Print subarrays of a given array
The idea is to use hash table (HashSet in Java) to store all generated substrings. Finally we return size of the HashSet.
Implementation:
// C++ program to count all distinct substrings in a string
#include<bits/stdc++.h>
using namespace std;
int distinctSubstring(string str)
{
// Put all distinct substring in a HashSet
set<string> result ;
// List All Substrings
for (int i = 0; i <= str.length(); i++)
{
for (int j = 1; j <= str.length()-i; j++)
{
// Add each substring in Set
result.insert(str.substr(i, j));
}
}
// Return size of the HashSet
return result.size();
}
// Driver Code
int main()
{
string str = "aaaa";
cout << (distinctSubstring(str));
}
// This code is contributed by Rajput-Ji
// Java program to count all distinct substrings in a string
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
public class DistinctSubstring {
public static int distinctSubstring(String str)
{
// Put all distinct substring in a HashSet
Set<String> result = new HashSet<String>();
// List All Substrings
for (int i = 0; i <= str.length(); i++) {
for (int j = i + 1; j <= str.length(); j++) {
// Add each substring in Set
result.add(str.substring(i, j));
}
}
// Return size of the HashSet
return result.size();
}
// Driver Code
public static void main(String[] args)
{
String str = "aaaa";
System.out.println(distinctSubstring(str));
}
}
# Python3 program to count all distinct substrings in a string
def distinctSubstring(str):
# Put all distinct substring in a HashSet
result = set()
# List All Substrings
for i in range(len(str)+1):
for j in range( i + 1, len(str)+1):
# Add each substring in Set
result.add(str[i:j]);
# Return size of the HashSet
return len(result);
# Driver Code
if __name__ == '__main__':
str = "aaaa";
print(distinctSubstring(str));
# This code has been contributed by 29AjayKumar
// C# program to count all distinct
// substrings in a string
using System;
using System.Collections.Generic;
class DistinctSubstring
{
public static int distinctSubstring(String str)
{
// Put all distinct substring in a HashSet
HashSet<String> result = new HashSet<String>();
// List All Substrings
for (int i = 0; i <= str.Length; i++)
{
for (int j = i + 1; j <= str.Length; j++)
{
// Add each substring in Set
result.Add(str.Substring(i, j - i));
}
}
// Return size of the HashSet
return result.Count;
}
// Driver Code
public static void Main(String[] args)
{
String str = "aaaa";
Console.WriteLine(distinctSubstring(str));
}
}
// This code is contributed by 29AjayKumar
function distinctSubstring(str) {
// Use a Set to store distinct substrings
const result = new Set();
// List all substrings
for (let i = 0; i < str.length; i++) {
for (let j = 1; j <= str.length - i; j++) {
// Add each substring to the Set
result.add(str.substring(i, i + j));
}
}
// Return the size of the Set
return result.size;
}
// Driver Code
const str = "aaaa";
console.log(distinctSubstring(str));
Output
4
Complexity Analysis:
- Time Complexity: O(n3logn)
- Auxiliary Space: O(n), since n extra space has been taken.
How to print the distinct substrings?
// C++ program to count all distinct
// substrings in a string
#include <bits/stdc++.h>
using namespace std;
set<string> distinctSubstring(string str)
{
// Put all distinct substrings
// in the Hashset
set<string> result;
// List all substrings
for(int i = 0; i <= str.length(); i++)
{
for(int j = i + 1; j <= str.length(); j++)
{
// Add each substring in Set
result.insert(str.substr(i, j));
}
}
// Return the hashset
return result;
}
// Driver code
int main()
{
string str = "aaaa";
set<string> subs = distinctSubstring(str);
cout << "Distinct Substrings are: \n";
for(auto i : subs)
cout << i << endl;
}
// This code is contributed by Ronak Mangal
// Java program to count all distinct substrings in a string
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
public class DistinctSubstring {
public static Set<String> distinctSubstring(String str)
{
// Put all distinct substring in a HashSet
Set<String> result = new HashSet<String>();
// List All Substrings
for (int i = 0; i <= str.length(); i++) {
for (int j = i + 1; j <= str.length(); j++) {
// Add each substring in Set
result.add(str.substring(i, j));
}
}
// Return the HashSet
return result;
}
// Driver Code
public static void main(String[] args)
{
String str = "aaaa";
Set<String> subs = distinctSubstring(str);
System.out.println("Distinct Substrings are: ");
for (String s : subs) {
System.out.println(s);
}
}
}
# Python3 program to count all distinct
# substrings in a string
def distinctSubstring(str):
# Put all distinct substring in a HashSet
result = set();
# List All Substrings
for i in range(len(str)):
for j in range(i + 1, len(str) + 1):
# Add each substring in Set
result.add(str[i:j]);
# Return the HashSet
return result;
# Driver Code
if __name__ == '__main__':
str = "aaaa";
subs = distinctSubstring(str);
print("Distinct Substrings are: ");
for s in subs:
print(s);
# This code is contributed by 29AjayKumar
// C# program to count all distinct
// substrings in a string
using System;
using System.Collections.Generic;
class GFG
{
public static HashSet<String> distinctSubstring(String str)
{
// Put all distinct substring in a HashSet
HashSet<String> result = new HashSet<String>();
// List All Substrings
for (int i = 0; i <= str.Length; i++)
{
for (int j = i + 1; j <= str.Length; j++)
{
// Add each substring in Set
result.Add(str.Substring(i, j - i));
}
}
// Return the HashSet
return result;
}
// Driver Code
public static void Main(String[] args)
{
String str = "aaaa";
HashSet<String> subs = distinctSubstring(str);
Console.WriteLine("Distinct Substrings are: ");
foreach (String s in subs)
{
Console.WriteLine(s);
}
}
}
// This code is contributed by 29AjayKumar
function distinctSubstring(str) {
// Use a Set to store distinct substrings
const result = new Set();
// List all substrings
for (let i = 0; i < str.length; i++) {
for (let j = i + 1; j <= str.length; j++) {
// Add each substring to the Set
result.add(str.substring(i, j));
}
}
// Return the Set containing distinct substrings
return result;
}
// Driver Code
const str = "aaaa";
const subs = distinctSubstring(str);
console.log("Distinct Substrings are: ");
subs.forEach(substring => {
console.log(substring);
});
Output
Distinct Substrings are: a aa aaa aaaa
Complexity Analysis:
- Time Complexity: O(n3logn)
- Auxiliary Space: O(n)
Optimization: We can further optimize the above code. The substr() function works in linear time. We can use append current character to previous substring to get the current substring.
Implementation:
// C++ implementation of the approach
#include <bits/stdc++.h>
using namespace std;
// Function to return the count of
// valid sub-strings
void printSubstrings(string s)
{
// To store distinct output substrings
unordered_set<string> us;
// Traverse through the given string and
// one by one generate substrings beginning
// from s[i].
for (int i = 0; i < s.size(); ++i) {
// One by one generate substrings ending
// with s[j]
string ss = "";
for (int j = i; j < s.size(); ++j) {
ss = ss + s[j];
us.insert(ss);
}
}
// Print all substrings one by one
for (auto s : us)
cout << s << " ";
}
// Driver code
int main()
{
string str = "aaabc";
printSubstrings(str);
return 0;
}
// Java implementation of the approach
import java.util.*;
class GFG
{
// Function to return the count of
// valid sub-Strings
static void printSubStrings(String s)
{
// To store distinct output subStrings
HashSet<String> us = new HashSet<String>();
// Traverse through the given String and
// one by one generate subStrings beginning
// from s[i].
for (int i = 0; i < s.length(); ++i)
{
// One by one generate subStrings ending
// with s[j]
String ss = "";
for (int j = i; j < s.length(); ++j)
{
ss = ss + s.charAt(j);
us.add(ss);
}
}
// Print all subStrings one by one
for (String str : us)
System.out.print(str + " ");
}
// Driver code
public static void main(String[] args)
{
String str = "aaabc";
printSubStrings(str);
}
}
// This code is contributed by Rajput-Ji
# Python3 implementation of the approach
# Function to return the count of
# valid sub-Strings
def printSubStrings(s):
# To store distinct output subStrings
us = set();
# Traverse through the given String and
# one by one generate subStrings beginning
# from s[i].
for i in range(len(s)):
# One by one generate subStrings ending
# with s[j]
ss = "";
for j in range(i, len(s)):
ss = ss + s[j];
us.add(ss);
# Print all subStrings one by one
for str in us:
print(str, end=" ");
# Driver code
if __name__ == '__main__':
str = "aaabc";
printSubStrings(str);
# This code is contributed by 29AjayKumar
// C# implementation of the approach
using System;
using System.Collections.Generic;
class GFG
{
// Function to return the count of
// valid sub-Strings
static void printSubStrings(String s)
{
// To store distinct output subStrings
HashSet<String> us = new HashSet<String>();
// Traverse through the given String and
// one by one generate subStrings
// beginning from s[i].
for (int i = 0; i < s.Length; ++i)
{
// One by one generate subStrings
// ending with s[j]
String ss = "";
for (int j = i; j < s.Length; ++j)
{
ss = ss + s[j];
us.Add(ss);
}
}
// Print all subStrings one by one
foreach (String str in us)
Console.Write(str + " ");
}
// Driver code
public static void Main(String[] args)
{
String str = "aaabc";
printSubStrings(str);
}
}
// This code is contributed by Rajput-Ji
function printSubstrings(s) {
// Set to store distinct substrings
const us = new Set();
// Traverse through the given string and generate substrings
for (let i = 0; i < s.length; i++) {
let ss = "";
for (let j = i; j < s.length; j++) {
ss += s[j]; // Append characters to form the substring
us.add(ss); // Add the substring to the set
}
}
// Print all distinct substrings
us.forEach(substring => {
console.log(substring);
});
}
// Driver code
const str = "aaabc";
printSubstrings(str);
Output
bc b abc ab aabc aa aaa c a aaab aab aaabc
Complexity Analysis:
- Time Complexity: O(n2)
- Auxiliary Space: O(n)
Space Optimization using Trie Data Structure (when we just need count of distinct substrings)
The above approach makes use of hashing which may lead to memory limit exceeded (MLE) in case of very large strings. The approximate space complexity of them is around O(n^3) as there can be n(n+1)/2 substrings which is around O(n^2) and each substring can be at least of 1 length or n length, i.e O(n/2) average case. This makes the total space complexity to be O(n^3).
We can improve this using Trie. The idea is to insert characters that are not already present in the Trie. And when such addition happens we know that this string is occurring for the first time and thus we print it. And if some characters of the string is already present we just move on to the next node without reading them which helps us on saving space.
The time complexity for this approach is O(n^2) similar to previous approach but the space reduces to O(n)*26.
Implementation:
#include <bits/stdc++.h>
using namespace std;
class TrieNode {
public:
bool isWord;
TrieNode* child[26];
TrieNode()
{
isWord = 0;
for (int i = 0; i < 26; i++) {
child[i] = 0;
}
}
};
int countDistinctSubstring(string str)
{
TrieNode* head = new TrieNode();
// will hold the count of unique substrings
int count = 0;
// included count of substr " "
for (int i = 0; i < str.length(); i++) {
TrieNode* temp = head;
for (int j = i; j < str.length(); j++) {
// when char not present add it to the trie
if (temp->child[str[j] - 'a'] == NULL) {
temp->child[str[j] - 'a'] = new TrieNode();
temp->isWord = 1;
count++;
}
// move on to the next char
temp = temp->child[str[j] - 'a'];
}
}
return count;
}
int main()
{
int count = countDistinctSubstring("aaabc");
cout << "Count of Distinct Substrings: " << count
<< endl;
return 0;
}
/*package whatever //do not write package name here */
import java.io.*;
class GFG {
static class TrieNode {
TrieNode children[];
boolean isEnd;
TrieNode()
{
this.children = new TrieNode[26];
this.isEnd = false;
}
}
static TrieNode root = new TrieNode();
static void insert(String str)
{
TrieNode cur = root;
for (char ch : str.toCharArray()) {
int idx = ch - 'a';
if (cur.children[idx] == null)
cur.children[idx] = new TrieNode();
cur = cur.children[idx];
}
cur.isEnd = true;
}
public static int distinctSubstringCount(String str)
{
// will hold the count of unique substrings
int cnt = 0;
for (int i = 0; i <= str.length(); i++) {
// start from root of trie each time as new
// starting point
TrieNode temp = root;
for (int j = i; j < str.length(); j++) {
char ch = str.charAt(j);
// when char not present add it to the trie
if (temp.children[ch - 'a'] == null) {
temp.children[ch - 'a']
= new TrieNode();
temp.isEnd = true;
cnt++;
}
// move on to the next char
temp = temp.children[ch - 'a'];
}
}
return cnt;
}
public static void main(String[] args)
{
int cnt = distinctSubstringCount("aaabc");
System.out.println("Count of distinct substrings: "
+ cnt);
}
}
# Python code for the above approach:
class TrieNode:
def __init__(self):
self.isWord = False
self.child = [None] * 26
def countDistinctSubstring(string):
head = TrieNode()
# will hold the count of unique substrings
count = 0
# included count of substr " "
for i in range(len(string)):
temp = head
for j in range(i, len(string)):
# when char not present add it to the trie
if not temp.child[ord(string[j]) - ord('a')]:
temp.child[ord(string[j]) - ord('a')] = TrieNode()
temp.isWord = True
count += 1
# move on to the next char
temp = temp.child[ord(string[j]) - ord('a')]
return count
count = countDistinctSubstring("aaabc")
print("Count of Distinct Substrings:", count)
# This code is contributed by lokesh.
using System;
using System.Collections.Generic;
class GFG {
class TrieNode {
public bool isWord;
public TrieNode[] child = new TrieNode[26];
public TrieNode()
{
this.isWord=false;
for (int i = 0; i < 26; i++) {
child[i] = null;
}
}
}
static int countDistinctSubstring(string str)
{
TrieNode head = new TrieNode();
// will hold the count of unique substrings
int count = 0;
// included count of substr " "
for (int i = 0; i < str.Length; i++) {
TrieNode temp = head;
for (int j = i; j < str.Length; j++)
{
// when char not present add it to the trie
if (temp.child[str[j] - 'a'] == null) {
temp.child[str[j] - 'a'] = new TrieNode();
temp.isWord = true;
count++;
}
// move on to the next char
temp = temp.child[str[j] - 'a'];
}
}
return count;
}
public static void Main()
{
int count = countDistinctSubstring("aaabc");
Console.Write("Count of Distinct Substrings: " + count);
}
}
// This code is contributed by poojaagarwal2.
class TrieNode {
constructor() {
this.isWord = false;
this.child = new Array(26).fill(null);
}
}
function countDistinctSubstring(str) {
let head = new TrieNode();
let count = 0;
for (let i = 0; i < str.length; i++) {
let temp = head;
for (let j = i; j < str.length; j++) {
if (temp.child[str.charCodeAt(j) - 'a'.charCodeAt(0)] === null) {
temp.child[str.charCodeAt(j) - 'a'.charCodeAt(0)] = new TrieNode();
temp.isWord = true;
count++;
}
temp = temp.child[str.charCodeAt(j) - 'a'.charCodeAt(0)];
}
}
return count;
}
console.log("Count of Distinct Substrings: " + countDistinctSubstring("aaabc"));
Output
Count of Distinct Substrings: 12
Complexity Analysis:
- Time Complexity: O(n2)
- Auxiliary Space: O(n2)