HTML 实体解析器 - 芒果文档

给定一个字符串str ，其中包含各种 HTML 实体，任务是将这些实体替换为其相应的特殊字符。

HTML entity parser is the parser that takes HTML code as input and replaces all the entities of the special characters by the characters itself. The special characters and their entities for HTML are Quotation Mark: the entity is ", and symbol character is “.

编程需要懂一点英语

下面是 HTML 实体及其相应的特殊字符如下表所示：

Name/ Description	HTML Entity	Special Character
Space
Ampersand	&	&
Greater than	>	>
Less than	<	<
Single Quotation Mark	'	‘
Double Quotation Mark	"	“
Trademark	®	®	Copyright mark	©	©	Forward Slash	⁄	⁄

例子：

Input: str = “17 > 25 and 25 < 17”
Output: 17 > 25 and 25 < 17
Explanation: In the above example > is
replaced by corresponding special character
> and < is replaced by <

Input: str = “© is symbol of copyright”
Output: © is symbol of copyright
Explanation: In the above example © is
replaced by corresponding special character
©

编程需要懂一点英语

方法一——使用unordered_map：步骤如下：

将 HTML 实体及其字符在 Map 中。
遍历给定的字符串，如果遇到任何字符“&” ，则查找此与符号后存在哪个 HTML 实体。
在输出字符串添加与实体对应的字符。
打印输出字符串作为结果。

下面是上述方法的实现：

C++

// C++ program for the above approach
#include 
#include 
using namespace std;
  
class GfG {
public:
    unordered_map m;
  
public:
    // Associating html entity with
    // special character
    void initializeMap()
    {
        m["""] = "\"";
        m["'"] = "'";
        m["&"] = "&";
        m[">"] = ">";
        m["<"] = "<";
        m["⁄"] = "/";
        m[" "] = " ";
        m["®"] = "®";
        m["©"] = "©";
    }
  
public:
    // Function that convert the given
    // HTML Entity to its parsed String
    string parseInputString(string input)
    {
        // Output string
        string output = "";
  
        // Traverse the string
        for (int i = 0;
             i < input.size(); i++) {
  
            // If any ampersand is occurred
            if (input[i] == '&') {
  
                string buffer;
  
                while (i < input.size()) {
  
                    buffer = buffer + input[i];
  
                    // If any Entity is found
                    if (input[i] == ';'
                        && m.find(buffer)
                               != m.end()) {
  
                        // Append the parsed
                        // character
                        output = output
                                 + m[buffer];
  
                        // Clear the buffer
                        buffer = "";
                        i++;
                        break;
                    }
                    else {
                        i++;
                    }
                }
  
                if (i >= input.size()) {
                    output = output
                             + buffer;
                    break;
                }
                i--;
            }
            else {
                output = output
                         + input[i];
            }
        }
  
        // Return the parsed string
        return output;
    }
};
  
// Driver Code
int main()
{
    // Given String
    string input = "17 > 25 and 25 < 17";
    GfG g;
  
    // Initialised parsed string
    g.initializeMap();
  
    // Function Call
    cout << g.parseInputString(input);
    return 0;
}

C++

// C++ program to Parse the HTML Entities
#include 
using namespace std;
  
class GfG {
  
public:
    string parseInputString(string input)
    {
  
        // To store parsed string
        string output = "";
  
        for (int i = 0;
             i < input.size(); i++) {
  
            // Matching pattern of html
            // entity
            if (input[i] == '&') {
                string buffer;
  
                while (i < input.size()) {
                    buffer = buffer + input[i];
  
                    // Check match for (\)
                    if (input[i] == ';'
                        && buffer == """) {
                        output = output + "\"";
                        buffer = "";
                        i++;
                        break;
                    }
  
                    // Check match for (')
                    else if (input[i] == ';'
                             && buffer == "'") {
                        output = output + "'";
                        buffer = "";
                        i++;
                        break;
                    }
  
                    // Check match for (&)
                    else if (input[i] == ';'
                             && buffer == "&") {
                        output = output + "&";
                        buffer = "";
                        i++;
                        break;
                    }
  
                    // Check match for (>)
                    else if (input[i] == ';'
                             && buffer == ">") {
                        output = output + ">";
                        buffer = "";
                        i++;
                        break;
                    }
  
                    // Check match for (<)
                    else if (input[i] == ';'
                             && buffer == "<") {
                        output = output + "<";
                        buffer = "";
                        i++;
                        break;
                    }
  
                    // Check match for (/)
                    else if (input[i] == ';'
                             && buffer == "⁄") {
                        output = output + "/";
                        buffer = "";
                        i++;
                        break;
                    }
  
                    // Check match for (" ")
                    else if (input[i] == ';'
                             && buffer == " ") {
                        output = output + " ";
                        buffer = "";
                        i++;
                        break;
                    }
  
                    // Check match for (®)
                    else if (input[i] == ';'
                             && buffer == "®") {
                        output = output + "®";
                        buffer = "";
                        i++;
                        break;
                    }
  
                    // Check match for (©)
                    else if (input[i] == ';'
                             && buffer == "©") {
                        output = output + "©";
                        buffer = "";
                        i++;
                        break;
                    }
                    else {
                        i++;
                    }
                }
  
                if (i >= input.size()) {
                    output = output + buffer;
                    break;
                }
                i--;
            }
            else {
                output = output + input[i];
            }
        }
  
        // Return the parsed string
        return output;
    }
};
  
// Driver Code
int main()
{
    // Given String
    string input = "17 > 25 and 25 < 17";
    GfG g;
  
    // Initialised parsed string
    g.initializeMap();
  
    // Function Call
    cout << g.parseInputString(input);
    return 0;
}

C++

// C++ program for the above approach
#include 
#include 
#include 
using namespace std;
  
// Given Expression with mapped value
const unordered_map m;
m = { { """, "\" },
        { "'", "'" },
        { "&", "&" },
        { ">", ">" },
        { "<", "<" },
        { "⁄", "/" } };
  
// Function that converts the given
// HTML Entity to its parsed String
string
parseInputString(string input)
{
    for (auto& it : m) {
  
        // Create ReGex Expression
        regex e(it.first);
  
        // Replace the above expression
        // with mapped value using
        // regex_replace()
        input = regex_replace(input, e,
                              it.second);
    }
  
    // Return the parsed string
    return input;
}
  
// Driver Code
int main()
{
    // Given String
    string input
        = "17 > 25 and 25 < 17";
  
    // Function Call
    cout << parseInputString(input);
    return 0;
}

输出：

17 > 25 and 25 < 17

时间复杂度： O(N)
辅助空间： O(N)

方法 2 – 使用模式匹配：
以下是步骤：

遍历给定的字符串str 。
遍历时，如果遇到任何字符“&” ，则查找此＆符号后存在哪个 HTML 实体。
在上表匹配字符的上表输出字符串添加与实体对应的字符。
遍历上述字符串后，打印输出字符串作为结果。

下面是上述方法的实现：

C++

// C++ program to Parse the HTML Entities
#include 
using namespace std;
  
class GfG {
  
public:
    string parseInputString(string input)
    {
  
        // To store parsed string
        string output = "";
  
        for (int i = 0;
             i < input.size(); i++) {
  
            // Matching pattern of html
            // entity
            if (input[i] == '&') {
                string buffer;
  
                while (i < input.size()) {
                    buffer = buffer + input[i];
  
                    // Check match for (\)
                    if (input[i] == ';'
                        && buffer == """) {
                        output = output + "\"";
                        buffer = "";
                        i++;
                        break;
                    }
  
                    // Check match for (')
                    else if (input[i] == ';'
                             && buffer == "'") {
                        output = output + "'";
                        buffer = "";
                        i++;
                        break;
                    }
  
                    // Check match for (&)
                    else if (input[i] == ';'
                             && buffer == "&") {
                        output = output + "&";
                        buffer = "";
                        i++;
                        break;
                    }
  
                    // Check match for (>)
                    else if (input[i] == ';'
                             && buffer == ">") {
                        output = output + ">";
                        buffer = "";
                        i++;
                        break;
                    }
  
                    // Check match for (<)
                    else if (input[i] == ';'
                             && buffer == "<") {
                        output = output + "<";
                        buffer = "";
                        i++;
                        break;
                    }
  
                    // Check match for (/)
                    else if (input[i] == ';'
                             && buffer == "⁄") {
                        output = output + "/";
                        buffer = "";
                        i++;
                        break;
                    }
  
                    // Check match for (" ")
                    else if (input[i] == ';'
                             && buffer == " ") {
                        output = output + " ";
                        buffer = "";
                        i++;
                        break;
                    }
  
                    // Check match for (®)
                    else if (input[i] == ';'
                             && buffer == "®") {
                        output = output + "®";
                        buffer = "";
                        i++;
                        break;
                    }
  
                    // Check match for (©)
                    else if (input[i] == ';'
                             && buffer == "©") {
                        output = output + "©";
                        buffer = "";
                        i++;
                        break;
                    }
                    else {
                        i++;
                    }
                }
  
                if (i >= input.size()) {
                    output = output + buffer;
                    break;
                }
                i--;
            }
            else {
                output = output + input[i];
            }
        }
  
        // Return the parsed string
        return output;
    }
};
  
// Driver Code
int main()
{
    // Given String
    string input = "17 > 25 and 25 < 17";
    GfG g;
  
    // Initialised parsed string
    g.initializeMap();
  
    // Function Call
    cout << g.parseInputString(input);
    return 0;
}

输出：

17 > 25 and 25 < 17

时间复杂度： O(N)
辅助空间： O(N)

方法 3 – 使用正则表达式：
以下是步骤：

将所有表达式及其映射值存储在 Map M 中。
对于地图中的每个键，使用以下方法创建正则表达式：

regex e(key);

编程需要懂一点英语
现在将上面用它在 Map M 中的映射值形成的正则表达式替换为：

regex_replace(str, e, value);
where,
str is the input string,
e is the expression formed in the above step, and
val is the value mapped with expression e in the Map

编程需要懂一点英语
重复以上步骤，直到所有的表达式都没有被替换。

下面是上述方法的实现：

C++

// C++ program for the above approach
#include 
#include 
#include 
using namespace std;
  
// Given Expression with mapped value
const unordered_map m;
m = { { """, "\" },
        { "'", "'" },
        { "&", "&" },
        { ">", ">" },
        { "<", "<" },
        { "⁄", "/" } };
  
// Function that converts the given
// HTML Entity to its parsed String
string
parseInputString(string input)
{
    for (auto& it : m) {
  
        // Create ReGex Expression
        regex e(it.first);
  
        // Replace the above expression
        // with mapped value using
        // regex_replace()
        input = regex_replace(input, e,
                              it.second);
    }
  
    // Return the parsed string
    return input;
}
  
// Driver Code
int main()
{
    // Given String
    string input
        = "17 > 25 and 25 < 17";
  
    // Function Call
    cout << parseInputString(input);
    return 0;
}

输出：

17 > 25 and 25 < 17

时间复杂度： O(N)
辅助空间： O(N)