使用线程的矩阵乘法

矩阵的乘法确实需要时间。使用普通矩阵乘法的矩阵乘法的时间复杂度为 O(n^3)。 Strassen算法对其进行了改进，其时间复杂度为O(n^(2.8074))。
但是，有什么方法可以提高使用普通方法的矩阵乘法的性能。
可以做多线程来改进它。在多线程中，我们不是使用处理器的单个内核，而是使用所有或更多内核来解决问题。
我们创建不同的线程，每个线程评估矩阵乘法的某些部分。
根据处理器的内核数量，您可以创建所需的线程数。尽管您可以根据需要创建任意数量的线程，但更好的方法是为一个内核创建每个线程。
在第二种方法中，我们为结果矩阵中的每个元素创建一个单独的线程。使用pthread_exit()我们从pthread_join()收集的每个线程返回计算值。这种方法不使用任何全局变量。

例子：

Input : 
Matrix A
 1 0 0
 0 1 0
 0 0 1

Matrix B
 2 3 2
 4 5 1
 7 8 6

Output : Multiplication of A and B
2 3 2
4 5 1
7 8 6

注意* 建议在基于 linux 的系统中执行程序
使用以下代码在 linux 中编译：

g++ -pthread program_name.cpp

CPP

// CPP Program to multiply two matrix using pthreads
#include 
using namespace std;
 
// maximum size of matrix
#define MAX 4
 
// maximum number of threads
#define MAX_THREAD 4
 
int matA[MAX][MAX];
int matB[MAX][MAX];
int matC[MAX][MAX];
int step_i = 0;
 
void* multi(void* arg)
{
    int i = step_i++; //i denotes row number of resultant matC
   
    for (int j = 0; j < MAX; j++)
      for (int k = 0; k < MAX; k++)
        matC[i][j] += matA[i][k] * matB[k][j];
}
 
// Driver Code
int main()
{
    // Generating random values in matA and matB
    for (int i = 0; i < MAX; i++) {
        for (int j = 0; j < MAX; j++) {
            matA[i][j] = rand() % 10;
            matB[i][j] = rand() % 10;
        }
    }
 
    // Displaying matA
    cout << endl
         << "Matrix A" << endl;
    for (int i = 0; i < MAX; i++) {
        for (int j = 0; j < MAX; j++)
            cout << matA[i][j] << " ";
        cout << endl;
    }
 
    // Displaying matB
    cout << endl
         << "Matrix B" << endl;
    for (int i = 0; i < MAX; i++) {
        for (int j = 0; j < MAX; j++)
            cout << matB[i][j] << " ";       
        cout << endl;
    }
 
    // declaring four threads
    pthread_t threads[MAX_THREAD];
 
    // Creating four threads, each evaluating its own part
    for (int i = 0; i < MAX_THREAD; i++) {
        int* p;
        pthread_create(&threads[i], NULL, multi, (void*)(p));
    }
 
    // joining and waiting for all threads to complete
    for (int i = 0; i < MAX_THREAD; i++)
        pthread_join(threads[i], NULL);   
 
    // Displaying the result matrix
    cout << endl
         << "Multiplication of A and B" << endl;
    for (int i = 0; i < MAX; i++) {
        for (int j = 0; j < MAX; j++)
            cout << matC[i][j] << " ";       
        cout << endl;
    }
    return 0;
}

C

// C Program to multiply two matrix using pthreads without
// use of global variables
#include
#include
#include
#include
#define MAX 4
 
 
//Each thread computes single element in the resultant matrix
void *mult(void* arg)
{
    int *data = (int *)arg;
    int k = 0, i = 0;
     
    int x = data[0];
    for (i = 1; i <= x; i++)
           k += data[i]*data[i+x];
     
    int *p = (int*)malloc(sizeof(int));
         *p = k;
     
//Used to terminate a thread and the return value is passed as a pointer
    pthread_exit(p);
}
 
//Driver code
int main()
{
 
    int matA[MAX][MAX];
    int matB[MAX][MAX];
     
     
    int r1=MAX,c1=MAX,r2=MAX,c2=MAX,i,j,k;
 
 
    // Generating random values in matA
    for (i = 0; i < r1; i++)
            for (j = 0; j < c1; j++)
                   matA[i][j] = rand() % 10;
           
        // Generating random values in matB
    for (i = 0; i < r1; i++)
            for (j = 0; j < c1; j++)
                   matB[i][j] = rand() % 10;
    
    // Displaying matA        
    for (i = 0; i < r1; i++){
        for(j = 0; j < c1; j++)
            printf("%d ",matA[i][j]);
        printf("\n");
    }
             
    // Displaying matB               
    for (i = 0; i < r2; i++){
        for(j = 0; j < c2; j++)
            printf("%d ",matB[i][j]);
        printf("\n");   
    }
     
     
    int max = r1*c2;
     
     
    //declaring array of threads of size r1*c2       
    pthread_t *threads;
    threads = (pthread_t*)malloc(max*sizeof(pthread_t));
     
    int count = 0;
    int* data = NULL;
    for (i = 0; i < r1; i++)
        for (j = 0; j < c2; j++)
               {
                
               //storing row and column elements in data
            data = (int *)malloc((20)*sizeof(int));
            data[0] = c1;
     
            for (k = 0; k < c1; k++)
                data[k+1] = matA[i][k];
     
            for (k = 0; k < r2; k++)
                data[k+c1+1] = matB[k][j];
              
             //creating threads
                pthread_create(&threads[count++], NULL,
                               mult, (void*)(data));
                 
                    }
     
    printf("RESULTANT MATRIX IS :- \n");
    for (i = 0; i < max; i++)
    {
      void *k;
       
      //Joining all threads and collecting return value
      pthread_join(threads[i], &k);
            
           
          int *p = (int *)k;
      printf("%d ",*p);
      if ((i + 1) % c2 == 0)
          printf("\n");
    }
 
     
 
  return 0;
}

输出：

Matrix A
3 7 3 6 
9 2 0 3 
0 2 1 7 
2 2 7 9 

Matrix B
6 5 5 2 
1 7 9 6 
6 6 8 9 
0 3 5 2 

Multiplication of A and B
43 100 132 87 
56 68 78 36 
8 41 61 35 
56 93 129 97

不使用全局变量的方法：
注意* 建议在基于 linux 的系统中执行程序
使用以下代码在 linux 中编译：

g++ -pthread program_name.cpp

C

// C Program to multiply two matrix using pthreads without
// use of global variables
#include
#include
#include
#include
#define MAX 4
 
 
//Each thread computes single element in the resultant matrix
void *mult(void* arg)
{
    int *data = (int *)arg;
    int k = 0, i = 0;
     
    int x = data[0];
    for (i = 1; i <= x; i++)
           k += data[i]*data[i+x];
     
    int *p = (int*)malloc(sizeof(int));
         *p = k;
     
//Used to terminate a thread and the return value is passed as a pointer
    pthread_exit(p);
}
 
//Driver code
int main()
{
 
    int matA[MAX][MAX];
    int matB[MAX][MAX];
     
     
    int r1=MAX,c1=MAX,r2=MAX,c2=MAX,i,j,k;
 
 
    // Generating random values in matA
    for (i = 0; i < r1; i++)
            for (j = 0; j < c1; j++)
                   matA[i][j] = rand() % 10;
           
        // Generating random values in matB
    for (i = 0; i < r1; i++)
            for (j = 0; j < c1; j++)
                   matB[i][j] = rand() % 10;
    
    // Displaying matA        
    for (i = 0; i < r1; i++){
        for(j = 0; j < c1; j++)
            printf("%d ",matA[i][j]);
        printf("\n");
    }
             
    // Displaying matB               
    for (i = 0; i < r2; i++){
        for(j = 0; j < c2; j++)
            printf("%d ",matB[i][j]);
        printf("\n");   
    }
     
     
    int max = r1*c2;
     
     
    //declaring array of threads of size r1*c2       
    pthread_t *threads;
    threads = (pthread_t*)malloc(max*sizeof(pthread_t));
     
    int count = 0;
    int* data = NULL;
    for (i = 0; i < r1; i++)
        for (j = 0; j < c2; j++)
               {
                
               //storing row and column elements in data
            data = (int *)malloc((20)*sizeof(int));
            data[0] = c1;
     
            for (k = 0; k < c1; k++)
                data[k+1] = matA[i][k];
     
            for (k = 0; k < r2; k++)
                data[k+c1+1] = matB[k][j];
              
             //creating threads
                pthread_create(&threads[count++], NULL,
                               mult, (void*)(data));
                 
                    }
     
    printf("RESULTANT MATRIX IS :- \n");
    for (i = 0; i < max; i++)
    {
      void *k;
       
      //Joining all threads and collecting return value
      pthread_join(threads[i], &k);
            
           
          int *p = (int *)k;
      printf("%d ",*p);
      if ((i + 1) % c2 == 0)
          printf("\n");
    }
 
     
 
  return 0;
}

输出：

Matrix A
3 7 3 6 
9 2 0 3 
0 2 1 7 
2 2 7 9

Matrix B
6 5 5 2 
1 7 9 6 
6 6 8 9 
0 3 5 2 

Multiplication of A and B
43 100 132 87 
56 68 78 36 
8 41 61 35 
56 93 129 97