// C=A*B, A=M*N, B=N*P ==> C=M*P voidmatmul_naive(int M, int N, int P, double *A, double *B, double *C) { // 初始化C为0 for(int i = 0; i < M*P; i++) C[i] = 0; for(int i = 0; i < M; i++) for (int j = 0; j < P; j++) for (int k = 0; k < N; k++) C[i * P + j] = A[i * N + k] * B[k * P + j]; }
intmain() { int M = 512, N = 512, P = 512; double *A = malloc(M * N * sizeof(double)); double *B = malloc(N * P * sizeof(double)); double *C = malloc(M * P * sizeof(double));
// 随机初始化 for (int i = 0; i < M * N; i++) A[i] = (double)rand() / RAND_MAX; for (int i = 0; i < N * P; i++) B[i] = (double)rand() / RAND_MAX; clock_t start = clock(); matmul_naive(M, N, P, A, B, C); printf("Naive: %.3f s\n", (double)(clock() - start) / CLOCKS_PER_SEC); free(A); free(B); free(C); return0; }
voidmatmul_opt1(int M, int N, int P, double *A, double *B, double *C) { for (int i = 0; i < M * P; i++) C[i] = 0; for (int i = 0; i < M; i++) for (int k = 0; k < N; k++) for (int j = 0; j < P; j++) C[i * P + j] += A[i * N + k] * B[k * P + j]; }