互斥的彼得森算法设置 2（CPU 周期和内存栅栏）

问题：给定2个进程i和j，你需要编写一个程序，在没有任何额外硬件支持的情况下，可以保证两者之间互斥。

我们强烈建议参考以下在上一篇文章中讨论的基本解决方案。
互斥的彼得森算法设置 1
我们将解决之前算法中的两个问题。

CPU时钟周期的浪费

通俗地说，当一个线程正在等待轮到它时，它以一个长的 while 循环结束，该循环每秒测试数百万次条件，从而进行不必要的计算。有一种更好的等待方式，它被称为“收益” 。

要了解它的作用，我们需要深入研究进程调度程序在 Linux 中的工作原理。这里提到的想法是调度程序的简化版本，实际实现有很多复杂性。

考虑下面的例子，
共有三个进程，P1、P2 和 P3。进程 P3 有一个类似于我们代码中的 while 循环，不做那么有用的计算，并且只有在 P2 完成执行时它才存在于循环中。调度程序将它们全部放入循环队列中。现在，假设处理器的时钟速度为 1000000/秒，它在每次迭代中为每个进程分配 100 个时钟。然后，首先 P1 将运行 100 个时钟（0.0001 秒），然后是 P2（0.0001 秒），然后是 P3（0.0001 秒），现在由于没有更多进程，这个循环重复直到 P2 结束，然后是 P3 的执行和最终其终止。

这完全浪费了 100 个 CPU 时钟周期。为了避免这种情况，我们相互放弃了CPU时间片，即yield，它本质上结束了这个时间片，调度器选择下一个进程运行。现在，我们测试一次我们的条件，然后我们放弃 CPU。考虑到我们的测试需要 25 个时钟周期，我们在一个时间片中节省了 75% 的计算。用图形来表示，

考虑到处理器时钟速度为 1MHz，这节省了很多！
不同的发行版提供不同的函数来实现此功能。 Linux 提供sched_yield() 。

C

void lock(int self)
{
    flag[self] = 1;
    turn = 1-self;
 
    while (flag[1-self] == 1 &&
           turn == 1-self)
  
        // Only change is the addition of
        // sched_yield() call
        sched_yield();
}

C

while (f == 0);
 
// Memory fence required here
print x;

C

// Filename: peterson_yieldlock_memoryfence.c
// Use below command to compile:
// gcc -pthread peterson_yieldlock_memoryfence.c -o peterson_yieldlock_memoryfence
 
#include
#include
#include "mythreads.h"
 
int flag[2];
int turn;
const int MAX = 1e9;
int ans = 0;
 
void lock_init()
{
    // Initialize lock by reseting the desire of
    // both the threads to acquire the locks.
    // And, giving turn to one of them.
    flag[0] = flag[1] = 0;
 
    turn = 0;
}
 
// Executed before entering critical section
void lock(int self)
{
    // Set flag[self] = 1 saying you want
    // to acquire lock
    flag[self]=1;
 
    // But, first give the other thread the
    // chance to acquire lock
    turn = 1-self;
 
    // Memory fence to prevent the reordering
    // of instructions beyond this barrier.
    __sync_synchronize();
 
    // Wait until the other thread loses the
    // desire to acquire  lock or it is your
    // turn to get the lock.
    while (flag[1-self]==1 && turn==1-self)
 
        // Yield to avoid wastage of resources.
        sched_yield();
}
 
// Executed after leaving critical section
void unlock(int self)
{
    // You do not desire to acquire lock in future.
    // This will allow the other thread to acquire
    // the lock.
    flag[self]=0;
}
 
// A Sample function run by two threads created
// in main()
void* func(void *s)
{
    int i = 0;
    int self = (int *)s;
    printf("Thread Entered: %d\n",self);
    lock(self);
 
    // Critical section (Only one thread
    // can enter here at a time)
    for (i=0; i


C
// mythread.h (A wrapper header file with assert
// statements)
#ifndef __MYTHREADS_h__
#define __MYTHREADS_h__
 
#include 
#include 
#include 
 
void Pthread_mutex_lock(pthread_mutex_t *m)
{
    int rc = pthread_mutex_lock(m);
    assert(rc == 0);
}
                                                                                 
void Pthread_mutex_unlock(pthread_mutex_t *m)
{
    int rc = pthread_mutex_unlock(m);
    assert(rc == 0);
}
                                                                                 
void Pthread_create(pthread_t *thread, const pthread_attr_t *attr,    
           void *(*start_routine)(void*), void *arg)
{
    int rc = pthread_create(thread, attr, start_routine, arg);
    assert(rc == 0);
}
 
void Pthread_join(pthread_t thread, void **value_ptr)
{
    int rc = pthread_join(thread, value_ptr);
    assert(rc == 0);
}
 
#endif // __MYTHREADS_h__

`记忆栅栏。`

早期教程中的代码可能适用于大多数系统，但并非 100% 正确。逻辑是完美的，但大多数现代 CPU 采用的性能优化可能会导致乱序执行。这种内存操作（加载和存储）的重新排序通常不会在单个执行线程中被注意到，但可能会导致并发程序中出现不可预测的行为。考虑这个例子，

`C`

while (f == 0);
 
// Memory fence required here
print x;

在上面的例子中，编译器认为这两条语句是相互独立的，因此试图通过重新排序来提高代码效率，这可能会导致并发程序出现问题。为了避免这种情况，我们放置了一个内存栅栏来向编译器提示跨越栅栏的语句之间可能的关系。

所以语句的顺序，

flag[self] = 1; turn = 1-self; while (turn condition check) yield();


        编程需要懂一点英语

必须完全相同才能使锁工作，否则最终会陷入死锁状态。

为确保这一点，编译器提供了一条指令，以防止跨此屏障对语句进行排序。在 gcc 的情况下，它的__sync_synchronize() 。所以修改后的代码变成，在 C 中的完整实现：

`C`

// Filename: peterson_yieldlock_memoryfence.c
// Use below command to compile:
// gcc -pthread peterson_yieldlock_memoryfence.c -o peterson_yieldlock_memoryfence
 
#include
#include
#include "mythreads.h"
 
int flag[2];
int turn;
const int MAX = 1e9;
int ans = 0;
 
void lock_init()
{
    // Initialize lock by reseting the desire of
    // both the threads to acquire the locks.
    // And, giving turn to one of them.
    flag[0] = flag[1] = 0;
 
    turn = 0;
}
 
// Executed before entering critical section
void lock(int self)
{
    // Set flag[self] = 1 saying you want
    // to acquire lock
    flag[self]=1;
 
    // But, first give the other thread the
    // chance to acquire lock
    turn = 1-self;
 
    // Memory fence to prevent the reordering
    // of instructions beyond this barrier.
    __sync_synchronize();
 
    // Wait until the other thread loses the
    // desire to acquire  lock or it is your
    // turn to get the lock.
    while (flag[1-self]==1 && turn==1-self)
 
        // Yield to avoid wastage of resources.
        sched_yield();
}
 
// Executed after leaving critical section
void unlock(int self)
{
    // You do not desire to acquire lock in future.
    // This will allow the other thread to acquire
    // the lock.
    flag[self]=0;
}
 
// A Sample function run by two threads created
// in main()
void* func(void *s)
{
    int i = 0;
    int self = (int *)s;
    printf("Thread Entered: %d\n",self);
    lock(self);
 
    // Critical section (Only one thread
    // can enter here at a time)
    for (i=0; i

`C`

// mythread.h (A wrapper header file with assert
// statements)
#ifndef __MYTHREADS_h__
#define __MYTHREADS_h__
 
#include 
#include 
#include 
 
void Pthread_mutex_lock(pthread_mutex_t *m)
{
    int rc = pthread_mutex_lock(m);
    assert(rc == 0);
}
                                                                                 
void Pthread_mutex_unlock(pthread_mutex_t *m)
{
    int rc = pthread_mutex_unlock(m);
    assert(rc == 0);
}
                                                                                 
void Pthread_create(pthread_t *thread, const pthread_attr_t *attr,    
           void *(*start_routine)(void*), void *arg)
{
    int rc = pthread_create(thread, attr, start_routine, arg);
    assert(rc == 0);
}
 
void Pthread_join(pthread_t thread, void **value_ptr)
{
    int rc = pthread_join(thread, value_ptr);
    assert(rc == 0);
}
 
#endif // __MYTHREADS_h__

输出：

Thread Entered: 1
Thread Entered: 0
Actual Count: 2000000000 | Expected Count: 2000000000