📜  在C / C++中借助Pragma加快代码执行速度

📅  最后修改于: 2021-05-30 08:58:44             🧑  作者: Mango

编译器的主要目标是降低编译成本,并使调试产生预期的结果。并非所有优化都直接由标志控制,有时我们需要显式声明标志以产生优化。默认情况下,不优化。要使用抑制的优化,我们将使用编译指示。

未优化程序的示例:让我们考虑一个示例,计算最大10000000的素数。

下面是没有优化的代码:

// C++ program to calculate the Prime
// Numbers upto 10000000 using Sieve
// of Eratosthenes with NO optimization
  
#include 
#include 
#include 
#define N 10000005
using namespace std;
  
// Boolean array for Prime Number
vector prime(N, true);
  
// Sieve implemented to find Prime
// Number
void sieveOfEratosthenes()
{
    for (int i = 2; i <= sqrt(N); ++i) {
        if (prime[i]) {
            for (int j = i * i; j <= N; j += i) {
                prime[j] = false;
            }
        }
    }
}
  
// Driver Code
int main()
{
    // Intialise clock to calculate
    // time required to execute without
    // optimization
    clock_t start, end;
  
    // Start clock
    start = clock();
  
    // Function call to find Prime Numbers
    sieveOfEratosthenes();
  
    // End clock
    end = clock();
  
    // Calculate the time difference
    double time_taken
        = double(end - start)
          / double(CLOCKS_PER_SEC);
  
    // Print the Calculated execution time
    cout << "Execution time: " << time_taken
         << " secs";
  
    return 0;
}
输出:
Execution time: 0.592183 secs

以下是优化:

  1. O1:在O1上优化编译需要更多的时间和内存来分解较大的功能。编译器试图减少代码和执行时间。在O1上,几乎没有任何优化可以产生出色的结果,但是O1是尝试更好的优化的挫折。

    以下是使用O1优化的先前程序的实现:

    // C++ program to calculate the Prime
    // Numbers upto 10000000 using Sieve
    // of Eratosthenes with O1 optimization
      
    // To see the working of controlled
    // optimization "O1"
    #pragma GCC optimize("O1")
      
    #include 
    #include 
    #include 
    #define N 10000005
    using namespace std;
      
    // Boolean array for Prime Number
    vector prime(N, true);
      
    // Sieve implemented to find Prime
    // Number
    void sieveOfEratosthenes()
    {
        for (int i = 2; i <= sqrt(N); ++i) {
            if (prime[i]) {
                for (int j = i * i; j <= N; j += i) {
                    prime[j] = false;
                }
            }
        }
    }
      
    // Driver Code
    int main()
    {
        // Intialise clock to calculate
        // time required to execute without
        // optimization
        clock_t start, end;
      
        // Start clock
        start = clock();
      
        // Function call to find Prime Numbers
        sieveOfEratosthenes();
      
        // End clock
        end = clock();
      
        // Calculate the time difference
        double time_taken
            = double(end - start)
              / double(CLOCKS_PER_SEC);
      
        // Print the Calculated execution time
        cout << "Execution time: " << time_taken
             << " secs.";
      
        return 0;
    }
    
    输出:
    Execution time: 0.384945 secs.
    
  2. O2:O2优化优化汇编到更大的程度。与O1相比,此选项增加了编译时间和所生成代码的性能。 O2打开由O1指定的所有优化标志。

    以下是使用O2优化的先前程序的实现:

    // C++ program to calculate the Prime
    // Numbers upto 10000000 using Sieve
    // of Eratosthenes with O2 optimization
      
    // To see the working of controlled
    // optimization "O2"
    #pragma GCC optimize("O2")
      
    #include 
    #include 
    #include 
    #define N 10000005
    using namespace std;
      
    // Boolean array for Prime Number
    vector prime(N, true);
      
    // Sieve implemented to find Prime
    // Number
    void sieveOfEratosthenes()
    {
        for (int i = 2; i <= sqrt(N); ++i) {
            if (prime[i]) {
                for (int j = i * i; j <= N; j += i) {
                    prime[j] = false;
                }
            }
        }
    }
      
    // Driver Code
    int main()
    {
        // Intialise clock to calculate
        // time required to execute without
        // optimization
        clock_t start, end;
      
        // Start clock
        start = clock();
      
        // Function call to find Prime Numbers
        sieveOfEratosthenes();
      
        // End clock
        end = clock();
      
        // Calculate the time difference
        double time_taken
            = double(end - start)
              / double(CLOCKS_PER_SEC);
      
        // Print the Calculated execution time
        cout << "Execution time: " << time_taken
             << " secs.";
      
        return 0;
    }
    
    输出:
    Execution time: 0.288337 secs.
    
  3. O3:在所有水平的臭氧和其他标志的列表中指定的O2优化也将启用。 O3中包含的标志很少是floop-interchange -floop-unroll-jam和-fpeel-loops。

    以下是使用O3优化的先前程序的实现:

    // C++ program to calculate the Prime
    // Numbers upto 10000000 using Sieve
    // of Eratosthenes with O3 optimization
      
    // To see the working of controlled
    // optimization "O3"
    #pragma GCC optimize("O3")
      
    #include 
    #include 
    #include 
    #define N 10000005
    using namespace std;
      
    // Boolean array for Prime Number
    vector prime(N, true);
      
    // Sieve implemented to find Prime
    // Number
    void sieveOfEratosthenes()
    {
        for (int i = 2; i <= sqrt(N); ++i) {
            if (prime[i]) {
                for (int j = i * i; j <= N; j += i) {
                    prime[j] = false;
                }
            }
        }
    }
      
    // Driver Code
    int main()
    {
        // Intialise clock to calculate
        // time required to execute without
        // optimization
        clock_t start, end;
      
        // Start clock
        start = clock();
      
        // Function call to find Prime Numbers
        sieveOfEratosthenes();
      
        // End clock
        end = clock();
      
        // Calculate the time difference
        double time_taken
            = double(end - start)
              / double(CLOCKS_PER_SEC);
      
        // Print the Calculated execution time
        cout << "Execution time: " << time_taken
             << " secs.";
      
        return 0;
    }
    
    输出:
    Execution time: 0.580154 secs.
    
  4. Os:针对大小进行了优化。 Os启用所有O2优化,但增加代码大小的优化除外。它还启用-finline-functions功能,使编译器调整代码大小而不是执行速度,并执行旨在减小代码大小的进一步优化。

    以下是使用OS优化的先前程序的实现:

    // C++ program to calculate the Prime
    // Numbers upto 10000000 using Sieve
    // of Eratosthenes with Os optimization
      
    // To see the working of controlled
    // optimization "Os"
    #pragma GCC optimize("Os")
      
    #include 
    #include 
    #include 
    #define N 10000005
    using namespace std;
      
    // Boolean array for Prime Number
    vector prime(N, true);
      
    // Sieve implemented to find Prime
    // Number
    void sieveOfEratosthenes()
    {
        for (int i = 2; i <= sqrt(N); ++i) {
            if (prime[i]) {
                for (int j = i * i; j <= N; j += i) {
                    prime[j] = false;
                }
            }
        }
    }
      
    // Driver Code
    int main()
    {
        // Intialise clock to calculate
        // time required to execute without
        // optimization
        clock_t start, end;
      
        // Start clock
        start = clock();
      
        // Function call to find Prime Numbers
        sieveOfEratosthenes();
      
        // End clock
        end = clock();
      
        // Calculate the time difference
        double time_taken
            = double(end - start)
              / double(CLOCKS_PER_SEC);
      
        // Print the Calculated execution time
        cout << "Execution time: " << time_taken
             << " secs.";
      
        return 0;
    }
    
    输出:
    Execution time: 0.317845 secs.
    
  5. Ofast: Ofast启用所有O3优化。它还具有产生超级优化结果的已启用标志的数量。 Ofast结合了以上每个O级别产生的优化。这种优化通常是许多有竞争力的程序员所偏爱的,因此值得推荐。如果声明了多个优化,则最后一个声明的优化将被启用。

    以下是使用Ofast优化的先前程序的实现:

    // C++ program to calculate the Prime
    // Numbers upto 10000000 using Sieve
    // of Eratosthenes with Ofast optimization
      
    // To see the working of controlled
    // optimization "Ofast"
    #pragma GCC optimize("Ofast")
      
    #include 
    #include 
    #include 
    #define N 10000005
    using namespace std;
      
    // Boolean array for Prime Number
    vector prime(N, true);
      
    // Sieve implemented to find Prime
    // Number
    void sieveOfEratosthenes()
    {
        for (int i = 2; i <= sqrt(N); ++i) {
            if (prime[i]) {
                for (int j = i * i; j <= N; j += i) {
                    prime[j] = false;
                }
            }
        }
    }
      
    // Driver Code
    int main()
    {
        // Intialise clock to calculate
        // time required to execute without
        // optimization
        clock_t start, end;
      
        // Start clock
        start = clock();
      
        // Function call to find Prime Numbers
        sieveOfEratosthenes();
      
        // End clock
        end = clock();
      
        // Calculate the time difference
        double time_taken
            = double(end - start)
              / double(CLOCKS_PER_SEC);
      
        // Print the Calculated execution time
        cout << "Execution time: " << time_taken
             << " secs.";
      
        return 0;
    }
    
    输出:
    Execution time: 0.303287 secs.
    

为了进一步实现体系结构级别的优化,我们可以将目标与编译指示一起使用。这些优化可以产生令人惊讶的结果。但是,建议对以上指定的任何优化使用target。
以下是先前使用Target实现的程序的实现:

// C++ program to calculate the Prime 
// Numbers upto 10000000 using Sieve 
// of Eratosthenes with Ofast optimization along with target optimizations 
  
// To see the working of controlled 
// optimization "Ofast" 
#pragma GCC optimize("Ofast") 
#pragma GCC target("avx,avx2,fma")
  
#include  
#include  
#include  
#define N 10000005 
using namespace std; 
  
// Boolean array for Prime Number 
vector prime(N, true); 
  
// Sieve implemented to find Prime 
// Number 
void sieveOfEratosthenes() 
{ 
    for (int i = 2; i <= sqrt(N); ++i) { 
        if (prime[i]) { 
            for (int j = i * i; j <= N; j += i) { 
                prime[j] = false; 
            } 
        } 
    } 
} 
  
// Driver Code 
int main() 
{ 
    // Intialise clock to calculate 
    // time required to execute without 
    // optimization 
    clock_t start, end; 
  
    // Start clock 
    start = clock(); 
  
    // Function call to find Prime Numbers 
    sieveOfEratosthenes(); 
  
    // End clock 
    end = clock(); 
  
    // Calculate the time difference 
    double time_taken 
        = double(end - start) 
        / double(CLOCKS_PER_SEC); 
  
    // Print the Calculated execution time 
    cout << "Execution time: " << time_taken 
        << " secs."; 
  
    return 0; 
} 
输出:
Execution time: 0.292147 secs.

如果您希望与行业专家一起参加现场课程,请参阅《 Geeks现场课程》和《 Geeks现场课程美国》。