给定其中包含各种HTML实体的字符串str ,任务是将这些实体替换为其相应的特殊字符。
HTML entity parser is the parser that takes HTML code as input and replaces all the entities of the special characters by the characters itself. The special characters and their entities for HTML are Quotation Mark: the entity is ", and symbol character is “.
以下是HTML实体及其相应的特殊字符,如下表所示:
Name/ Description | HTML Entity | Special Character | ||||||
---|---|---|---|---|---|---|---|---|
Space | ||||||||
Ampersand | & | & | ||||||
Greater than | > | > | ||||||
Less than | < | < | ||||||
Single Quotation Mark | ' | ‘ | ||||||
Double Quotation Mark | " | “ | ||||||
Trademark | ® | ® | Copyright mark | © | © | Forward Slash | ⁄ | ⁄ |
例子:
Input: str = “17 > 25 and 25 < 17”
Output: 17 > 25 and 25 < 17
Explanation: In the above example > is
replaced by corresponding special character
> and < is replaced by <
Input: str = “© is symbol of copyright”
Output: © is symbol of copyright
Explanation: In the above example © is
replaced by corresponding special character
©
方法1 –使用unordered_map:以下是步骤:
- 将HTML实体及其字符在Map中。
- 遍历给定的字符串,如果遇到任何字符“&” ,则查找在此“&”号之后存在哪个HTML实体。
- 在输出字符串添加带有Entity的相应字符。
- 打印输出字符串作为结果。
下面是上述方法的实现:
C++
// C++ program for the above approach
#include
#include
using namespace std;
class GfG {
public:
unordered_map m;
public:
// Associating html entity with
// special character
void initializeMap()
{
m["""] = "\"";
m["'"] = "'";
m["&"] = "&";
m[">"] = ">";
m["<"] = "<";
m["⁄"] = "/";
m[" "] = " ";
m["®"] = "®";
m["©"] = "©";
}
public:
// Function that convert the given
// HTML Entity to its parsed String
string parseInputString(string input)
{
// Output string
string output = "";
// Traverse the string
for (int i = 0;
i < input.size(); i++) {
// If any ampersand is occurred
if (input[i] == '&') {
string buffer;
while (i < input.size()) {
buffer = buffer + input[i];
// If any Entity is found
if (input[i] == ';'
&& m.find(buffer)
!= m.end()) {
// Append the parsed
// character
output = output
+ m[buffer];
// Clear the buffer
buffer = "";
i++;
break;
}
else {
i++;
}
}
if (i >= input.size()) {
output = output
+ buffer;
break;
}
i--;
}
else {
output = output
+ input[i];
}
}
// Return the parsed string
return output;
}
};
// Driver Code
int main()
{
// Given String
string input = "17 > 25 and 25 < 17";
GfG g;
// Initialised parsed string
g.initializeMap();
// Function Call
cout << g.parseInputString(input);
return 0;
}
C++
// C++ program to Parse the HTML Entities
#include
using namespace std;
class GfG {
public:
string parseInputString(string input)
{
// To store parsed string
string output = "";
for (int i = 0;
i < input.size(); i++) {
// Matching pattern of html
// entity
if (input[i] == '&') {
string buffer;
while (i < input.size()) {
buffer = buffer + input[i];
// Check match for (\)
if (input[i] == ';'
&& buffer == """) {
output = output + "\"";
buffer = "";
i++;
break;
}
// Check match for (')
else if (input[i] == ';'
&& buffer == "'") {
output = output + "'";
buffer = "";
i++;
break;
}
// Check match for (&)
else if (input[i] == ';'
&& buffer == "&") {
output = output + "&";
buffer = "";
i++;
break;
}
// Check match for (>)
else if (input[i] == ';'
&& buffer == ">") {
output = output + ">";
buffer = "";
i++;
break;
}
// Check match for (<)
else if (input[i] == ';'
&& buffer == "<") {
output = output + "<";
buffer = "";
i++;
break;
}
// Check match for (/)
else if (input[i] == ';'
&& buffer == "⁄") {
output = output + "/";
buffer = "";
i++;
break;
}
// Check match for (" ")
else if (input[i] == ';'
&& buffer == " ") {
output = output + " ";
buffer = "";
i++;
break;
}
// Check match for (®)
else if (input[i] == ';'
&& buffer == "®") {
output = output + "®";
buffer = "";
i++;
break;
}
// Check match for (©)
else if (input[i] == ';'
&& buffer == "©") {
output = output + "©";
buffer = "";
i++;
break;
}
else {
i++;
}
}
if (i >= input.size()) {
output = output + buffer;
break;
}
i--;
}
else {
output = output + input[i];
}
}
// Return the parsed string
return output;
}
};
// Driver Code
int main()
{
// Given String
string input = "17 > 25 and 25 < 17";
GfG g;
// Initialised parsed string
g.initializeMap();
// Function Call
cout << g.parseInputString(input);
return 0;
}
C++
// C++ program for the above approach
#include
#include
#include
using namespace std;
// Given Expression with mapped value
const unordered_map m;
m = { { """, "\" },
{ "'", "'" },
{ "&", "&" },
{ ">", ">" },
{ "<", "<" },
{ "⁄", "/" } };
// Function that converts the given
// HTML Entity to its parsed String
string
parseInputString(string input)
{
for (auto& it : m) {
// Create ReGex Expression
regex e(it.first);
// Replace the above expression
// with mapped value using
// regex_replace()
input = regex_replace(input, e,
it.second);
}
// Return the parsed string
return input;
}
// Driver Code
int main()
{
// Given String
string input
= "17 > 25 and 25 < 17";
// Function Call
cout << parseInputString(input);
return 0;
}
17 > 25 and 25 < 17
时间复杂度: O(N)
辅助空间: O(N)
方法2 –使用模式匹配:
步骤如下:
- 遍历给定的字符串str 。
- 在遍历时,如果遇到任何字符“&” ,则查找在此“与”号之后存在哪个HTML实体。
- 从上表中的匹配字符表中的输出字符串中将相应的字符与Entity添加到输出字符串中。
- 遍历上面的字符串后,将输出的字符串作为结果打印。
下面是上述方法的实现:
C++
// C++ program to Parse the HTML Entities
#include
using namespace std;
class GfG {
public:
string parseInputString(string input)
{
// To store parsed string
string output = "";
for (int i = 0;
i < input.size(); i++) {
// Matching pattern of html
// entity
if (input[i] == '&') {
string buffer;
while (i < input.size()) {
buffer = buffer + input[i];
// Check match for (\)
if (input[i] == ';'
&& buffer == """) {
output = output + "\"";
buffer = "";
i++;
break;
}
// Check match for (')
else if (input[i] == ';'
&& buffer == "'") {
output = output + "'";
buffer = "";
i++;
break;
}
// Check match for (&)
else if (input[i] == ';'
&& buffer == "&") {
output = output + "&";
buffer = "";
i++;
break;
}
// Check match for (>)
else if (input[i] == ';'
&& buffer == ">") {
output = output + ">";
buffer = "";
i++;
break;
}
// Check match for (<)
else if (input[i] == ';'
&& buffer == "<") {
output = output + "<";
buffer = "";
i++;
break;
}
// Check match for (/)
else if (input[i] == ';'
&& buffer == "⁄") {
output = output + "/";
buffer = "";
i++;
break;
}
// Check match for (" ")
else if (input[i] == ';'
&& buffer == " ") {
output = output + " ";
buffer = "";
i++;
break;
}
// Check match for (®)
else if (input[i] == ';'
&& buffer == "®") {
output = output + "®";
buffer = "";
i++;
break;
}
// Check match for (©)
else if (input[i] == ';'
&& buffer == "©") {
output = output + "©";
buffer = "";
i++;
break;
}
else {
i++;
}
}
if (i >= input.size()) {
output = output + buffer;
break;
}
i--;
}
else {
output = output + input[i];
}
}
// Return the parsed string
return output;
}
};
// Driver Code
int main()
{
// Given String
string input = "17 > 25 and 25 < 17";
GfG g;
// Initialised parsed string
g.initializeMap();
// Function Call
cout << g.parseInputString(input);
return 0;
}
17 > 25 and 25 < 17
时间复杂度: O(N)
辅助空间: O(N)
方法3 –使用正则表达式:
步骤如下:
- 将所有表达式及其映射值存储在Map M中。
- 对于映射中的每个键,使用以下命令创建一个正则表达式:
regex e(key);
- 现在,用在Map M中的映射值替换上面形成的上述正则表达式为:
regex_replace(str, e, value);
where,
str is the input string,
e is the expression formed in the above step, and
val is the value mapped with expression e in the Map - 重复上述步骤,直到所有表达式都没有被替换。
下面是上述方法的实现:
C++
// C++ program for the above approach
#include
#include
#include
using namespace std;
// Given Expression with mapped value
const unordered_map m;
m = { { """, "\" },
{ "'", "'" },
{ "&", "&" },
{ ">", ">" },
{ "<", "<" },
{ "⁄", "/" } };
// Function that converts the given
// HTML Entity to its parsed String
string
parseInputString(string input)
{
for (auto& it : m) {
// Create ReGex Expression
regex e(it.first);
// Replace the above expression
// with mapped value using
// regex_replace()
input = regex_replace(input, e,
it.second);
}
// Return the parsed string
return input;
}
// Driver Code
int main()
{
// Given String
string input
= "17 > 25 and 25 < 17";
// Function Call
cout << parseInputString(input);
return 0;
}
17 > 25 and 25 < 17
时间复杂度: O(N)
辅助空间: O(N)