给定一个字符串str ,其中包含各种 HTML 实体,任务是将这些实体替换为其相应的特殊字符。
HTML entity parser is the parser that takes HTML code as input and replaces all the entities of the special characters by the characters itself. The special characters and their entities for HTML are Quotation Mark: the entity is ", and symbol character is “.
下面是 HTML 实体及其相应的特殊字符如下表所示:
Name/ Description | HTML Entity | Special Character | ||||||
---|---|---|---|---|---|---|---|---|
Space | ||||||||
Ampersand | & | & | ||||||
Greater than | > | > | ||||||
Less than | < | < | ||||||
Single Quotation Mark | ' | ‘ | ||||||
Double Quotation Mark | " | “ | ||||||
Trademark | ® | ® | Copyright mark | © | © | Forward Slash | ⁄ | ⁄ |
例子:
Input: str = “17 > 25 and 25 < 17”
Output: 17 > 25 and 25 < 17
Explanation: In the above example > is
replaced by corresponding special character
> and < is replaced by <
Input: str = “© is symbol of copyright”
Output: © is symbol of copyright
Explanation: In the above example © is
replaced by corresponding special character
©
方法一——使用unordered_map:步骤如下:
- 将 HTML 实体及其字符在 Map 中。
- 遍历给定的字符串,如果遇到任何字符“&” ,则查找此与符号后存在哪个 HTML 实体。
- 在输出字符串添加与实体对应的字符。
- 打印输出字符串作为结果。
下面是上述方法的实现:
C++
// C++ program for the above approach
#include
#include
using namespace std;
class GfG {
public:
unordered_map m;
public:
// Associating html entity with
// special character
void initializeMap()
{
m["""] = "\"";
m["'"] = "'";
m["&"] = "&";
m[">"] = ">";
m["<"] = "<";
m["⁄"] = "/";
m[" "] = " ";
m["®"] = "®";
m["©"] = "©";
}
public:
// Function that convert the given
// HTML Entity to its parsed String
string parseInputString(string input)
{
// Output string
string output = "";
// Traverse the string
for (int i = 0;
i < input.size(); i++) {
// If any ampersand is occurred
if (input[i] == '&') {
string buffer;
while (i < input.size()) {
buffer = buffer + input[i];
// If any Entity is found
if (input[i] == ';'
&& m.find(buffer)
!= m.end()) {
// Append the parsed
// character
output = output
+ m[buffer];
// Clear the buffer
buffer = "";
i++;
break;
}
else {
i++;
}
}
if (i >= input.size()) {
output = output
+ buffer;
break;
}
i--;
}
else {
output = output
+ input[i];
}
}
// Return the parsed string
return output;
}
};
// Driver Code
int main()
{
// Given String
string input = "17 > 25 and 25 < 17";
GfG g;
// Initialised parsed string
g.initializeMap();
// Function Call
cout << g.parseInputString(input);
return 0;
}
C++
// C++ program to Parse the HTML Entities
#include
using namespace std;
class GfG {
public:
string parseInputString(string input)
{
// To store parsed string
string output = "";
for (int i = 0;
i < input.size(); i++) {
// Matching pattern of html
// entity
if (input[i] == '&') {
string buffer;
while (i < input.size()) {
buffer = buffer + input[i];
// Check match for (\)
if (input[i] == ';'
&& buffer == """) {
output = output + "\"";
buffer = "";
i++;
break;
}
// Check match for (')
else if (input[i] == ';'
&& buffer == "'") {
output = output + "'";
buffer = "";
i++;
break;
}
// Check match for (&)
else if (input[i] == ';'
&& buffer == "&") {
output = output + "&";
buffer = "";
i++;
break;
}
// Check match for (>)
else if (input[i] == ';'
&& buffer == ">") {
output = output + ">";
buffer = "";
i++;
break;
}
// Check match for (<)
else if (input[i] == ';'
&& buffer == "<") {
output = output + "<";
buffer = "";
i++;
break;
}
// Check match for (/)
else if (input[i] == ';'
&& buffer == "⁄") {
output = output + "/";
buffer = "";
i++;
break;
}
// Check match for (" ")
else if (input[i] == ';'
&& buffer == " ") {
output = output + " ";
buffer = "";
i++;
break;
}
// Check match for (®)
else if (input[i] == ';'
&& buffer == "®") {
output = output + "®";
buffer = "";
i++;
break;
}
// Check match for (©)
else if (input[i] == ';'
&& buffer == "©") {
output = output + "©";
buffer = "";
i++;
break;
}
else {
i++;
}
}
if (i >= input.size()) {
output = output + buffer;
break;
}
i--;
}
else {
output = output + input[i];
}
}
// Return the parsed string
return output;
}
};
// Driver Code
int main()
{
// Given String
string input = "17 > 25 and 25 < 17";
GfG g;
// Initialised parsed string
g.initializeMap();
// Function Call
cout << g.parseInputString(input);
return 0;
}
C++
// C++ program for the above approach
#include
#include
#include
using namespace std;
// Given Expression with mapped value
const unordered_map m;
m = { { """, "\" },
{ "'", "'" },
{ "&", "&" },
{ ">", ">" },
{ "<", "<" },
{ "⁄", "/" } };
// Function that converts the given
// HTML Entity to its parsed String
string
parseInputString(string input)
{
for (auto& it : m) {
// Create ReGex Expression
regex e(it.first);
// Replace the above expression
// with mapped value using
// regex_replace()
input = regex_replace(input, e,
it.second);
}
// Return the parsed string
return input;
}
// Driver Code
int main()
{
// Given String
string input
= "17 > 25 and 25 < 17";
// Function Call
cout << parseInputString(input);
return 0;
}
17 > 25 and 25 < 17
时间复杂度: O(N)
辅助空间: O(N)
方法 2 – 使用模式匹配:
以下是步骤:
- 遍历给定的字符串str 。
- 遍历时,如果遇到任何字符“&” ,则查找此&符号后存在哪个 HTML 实体。
- 在上表匹配字符的上表输出字符串添加与实体对应的字符。
- 遍历上述字符串后,打印输出字符串作为结果。
下面是上述方法的实现:
C++
// C++ program to Parse the HTML Entities
#include
using namespace std;
class GfG {
public:
string parseInputString(string input)
{
// To store parsed string
string output = "";
for (int i = 0;
i < input.size(); i++) {
// Matching pattern of html
// entity
if (input[i] == '&') {
string buffer;
while (i < input.size()) {
buffer = buffer + input[i];
// Check match for (\)
if (input[i] == ';'
&& buffer == """) {
output = output + "\"";
buffer = "";
i++;
break;
}
// Check match for (')
else if (input[i] == ';'
&& buffer == "'") {
output = output + "'";
buffer = "";
i++;
break;
}
// Check match for (&)
else if (input[i] == ';'
&& buffer == "&") {
output = output + "&";
buffer = "";
i++;
break;
}
// Check match for (>)
else if (input[i] == ';'
&& buffer == ">") {
output = output + ">";
buffer = "";
i++;
break;
}
// Check match for (<)
else if (input[i] == ';'
&& buffer == "<") {
output = output + "<";
buffer = "";
i++;
break;
}
// Check match for (/)
else if (input[i] == ';'
&& buffer == "⁄") {
output = output + "/";
buffer = "";
i++;
break;
}
// Check match for (" ")
else if (input[i] == ';'
&& buffer == " ") {
output = output + " ";
buffer = "";
i++;
break;
}
// Check match for (®)
else if (input[i] == ';'
&& buffer == "®") {
output = output + "®";
buffer = "";
i++;
break;
}
// Check match for (©)
else if (input[i] == ';'
&& buffer == "©") {
output = output + "©";
buffer = "";
i++;
break;
}
else {
i++;
}
}
if (i >= input.size()) {
output = output + buffer;
break;
}
i--;
}
else {
output = output + input[i];
}
}
// Return the parsed string
return output;
}
};
// Driver Code
int main()
{
// Given String
string input = "17 > 25 and 25 < 17";
GfG g;
// Initialised parsed string
g.initializeMap();
// Function Call
cout << g.parseInputString(input);
return 0;
}
17 > 25 and 25 < 17
时间复杂度: O(N)
辅助空间: O(N)
方法 3 – 使用正则表达式:
以下是步骤:
- 将所有表达式及其映射值存储在 Map M 中。
- 对于地图中的每个键,使用以下方法创建正则表达式:
regex e(key);
- 现在将上面用它在 Map M 中的映射值形成的正则表达式替换为:
regex_replace(str, e, value);
where,
str is the input string,
e is the expression formed in the above step, and
val is the value mapped with expression e in the Map - 重复以上步骤,直到所有的表达式都没有被替换。
下面是上述方法的实现:
C++
// C++ program for the above approach
#include
#include
#include
using namespace std;
// Given Expression with mapped value
const unordered_map m;
m = { { """, "\" },
{ "'", "'" },
{ "&", "&" },
{ ">", ">" },
{ "<", "<" },
{ "⁄", "/" } };
// Function that converts the given
// HTML Entity to its parsed String
string
parseInputString(string input)
{
for (auto& it : m) {
// Create ReGex Expression
regex e(it.first);
// Replace the above expression
// with mapped value using
// regex_replace()
input = regex_replace(input, e,
it.second);
}
// Return the parsed string
return input;
}
// Driver Code
int main()
{
// Given String
string input
= "17 > 25 and 25 < 17";
// Function Call
cout << parseInputString(input);
return 0;
}
17 > 25 and 25 < 17
时间复杂度: O(N)
辅助空间: O(N)