1 static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n"; 2 3 #include <petscmat.h> 4 5 /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */ 6 int global_index(PetscInt i, PetscInt j, PetscInt k, PetscInt m, PetscInt n) 7 { 8 return i + j * m + k * m * n; 9 } 10 11 int main(int argc, char **argv) 12 { 13 Mat A, B, C, PtAP, PtAP_copy, PtAP_squared; 14 PetscInt i, M, N, Istart, Iend, n = 7, j, J, Ii, m = 8, k, o = 1; 15 PetscScalar v; 16 PetscBool equal = PETSC_FALSE, mat_view = PETSC_FALSE; 17 char stencil[PETSC_MAX_PATH_LEN]; 18 PetscLogStage fullMatMatMultStage; 19 20 PetscFunctionBeginUser; 21 PetscCall(PetscInitialize(&argc, &argv, (char *)0, help)); 22 PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL)); 23 PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL)); 24 PetscCall(PetscOptionsGetInt(NULL, NULL, "-o", &o, NULL)); 25 PetscCall(PetscOptionsHasName(NULL, NULL, "-result_view", &mat_view)); 26 PetscCall(PetscOptionsGetString(NULL, NULL, "-stencil", stencil, sizeof(stencil), NULL)); 27 28 /* Create a aij matrix A */ 29 M = N = m * n * o; 30 PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 31 PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, M, N)); 32 PetscCall(MatSetType(A, MATAIJ)); 33 PetscCall(MatSetFromOptions(A)); 34 35 /* Consistency checks */ 36 PetscCheck(o >= 1 && m > 1 && n >= 1, PETSC_COMM_WORLD, PETSC_ERR_USER, "Dimensions need to be larger than zero!"); 37 38 /************ 2D stencils ***************/ 39 PetscCall(PetscStrcmp(stencil, "2d5point", &equal)); 40 if (equal) { /* 5-point stencil, 2D */ 41 PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL)); 42 PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL)); 43 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 44 for (Ii = Istart; Ii < Iend; Ii++) { 45 v = -1.0; 46 k = Ii / (m * n); 47 j = (Ii - k * m * n) / m; 48 i = (Ii - k * m * n - j * m); 49 if (i > 0) { 50 J = global_index(i - 1, j, k, m, n); 51 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 52 } 53 if (i < m - 1) { 54 J = global_index(i + 1, j, k, m, n); 55 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 56 } 57 if (j > 0) { 58 J = global_index(i, j - 1, k, m, n); 59 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 60 } 61 if (j < n - 1) { 62 J = global_index(i, j + 1, k, m, n); 63 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 64 } 65 v = 4.0; 66 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 67 } 68 } 69 PetscCall(PetscStrcmp(stencil, "2d9point", &equal)); 70 if (equal) { /* 9-point stencil, 2D */ 71 PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL)); 72 PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL)); 73 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 74 for (Ii = Istart; Ii < Iend; Ii++) { 75 v = -1.0; 76 k = Ii / (m * n); 77 j = (Ii - k * m * n) / m; 78 i = (Ii - k * m * n - j * m); 79 if (i > 0) { 80 J = global_index(i - 1, j, k, m, n); 81 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 82 } 83 if (i > 0 && j > 0) { 84 J = global_index(i - 1, j - 1, k, m, n); 85 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 86 } 87 if (j > 0) { 88 J = global_index(i, j - 1, k, m, n); 89 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 90 } 91 if (i < m - 1 && j > 0) { 92 J = global_index(i + 1, j - 1, k, m, n); 93 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 94 } 95 if (i < m - 1) { 96 J = global_index(i + 1, j, k, m, n); 97 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 98 } 99 if (i < m - 1 && j < n - 1) { 100 J = global_index(i + 1, j + 1, k, m, n); 101 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 102 } 103 if (j < n - 1) { 104 J = global_index(i, j + 1, k, m, n); 105 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 106 } 107 if (i > 0 && j < n - 1) { 108 J = global_index(i - 1, j + 1, k, m, n); 109 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 110 } 111 v = 8.0; 112 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 113 } 114 } 115 PetscCall(PetscStrcmp(stencil, "2d9point2", &equal)); 116 if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */ 117 PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL)); 118 PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL)); 119 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 120 for (Ii = Istart; Ii < Iend; Ii++) { 121 v = -1.0; 122 k = Ii / (m * n); 123 j = (Ii - k * m * n) / m; 124 i = (Ii - k * m * n - j * m); 125 if (i > 0) { 126 J = global_index(i - 1, j, k, m, n); 127 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 128 } 129 if (i > 1) { 130 J = global_index(i - 2, j, k, m, n); 131 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 132 } 133 if (i < m - 1) { 134 J = global_index(i + 1, j, k, m, n); 135 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 136 } 137 if (i < m - 2) { 138 J = global_index(i + 2, j, k, m, n); 139 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 140 } 141 if (j > 0) { 142 J = global_index(i, j - 1, k, m, n); 143 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 144 } 145 if (j > 1) { 146 J = global_index(i, j - 2, k, m, n); 147 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 148 } 149 if (j < n - 1) { 150 J = global_index(i, j + 1, k, m, n); 151 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 152 } 153 if (j < n - 2) { 154 J = global_index(i, j + 2, k, m, n); 155 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 156 } 157 v = 8.0; 158 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 159 } 160 } 161 PetscCall(PetscStrcmp(stencil, "2d13point", &equal)); 162 if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */ 163 PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL)); 164 PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL)); 165 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 166 for (Ii = Istart; Ii < Iend; Ii++) { 167 v = -1.0; 168 k = Ii / (m * n); 169 j = (Ii - k * m * n) / m; 170 i = (Ii - k * m * n - j * m); 171 if (i > 0) { 172 J = global_index(i - 1, j, k, m, n); 173 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 174 } 175 if (i > 1) { 176 J = global_index(i - 2, j, k, m, n); 177 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 178 } 179 if (i > 2) { 180 J = global_index(i - 3, j, k, m, n); 181 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 182 } 183 if (i < m - 1) { 184 J = global_index(i + 1, j, k, m, n); 185 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 186 } 187 if (i < m - 2) { 188 J = global_index(i + 2, j, k, m, n); 189 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 190 } 191 if (i < m - 3) { 192 J = global_index(i + 3, j, k, m, n); 193 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 194 } 195 if (j > 0) { 196 J = global_index(i, j - 1, k, m, n); 197 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 198 } 199 if (j > 1) { 200 J = global_index(i, j - 2, k, m, n); 201 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 202 } 203 if (j > 2) { 204 J = global_index(i, j - 3, k, m, n); 205 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 206 } 207 if (j < n - 1) { 208 J = global_index(i, j + 1, k, m, n); 209 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 210 } 211 if (j < n - 2) { 212 J = global_index(i, j + 2, k, m, n); 213 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 214 } 215 if (j < n - 3) { 216 J = global_index(i, j + 3, k, m, n); 217 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 218 } 219 v = 12.0; 220 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 221 } 222 } 223 /************ 3D stencils ***************/ 224 PetscCall(PetscStrcmp(stencil, "3d7point", &equal)); 225 if (equal) { /* 7-point stencil, 3D */ 226 PetscCall(MatMPIAIJSetPreallocation(A, 7, NULL, 7, NULL)); 227 PetscCall(MatSeqAIJSetPreallocation(A, 7, NULL)); 228 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 229 for (Ii = Istart; Ii < Iend; Ii++) { 230 v = -1.0; 231 k = Ii / (m * n); 232 j = (Ii - k * m * n) / m; 233 i = (Ii - k * m * n - j * m); 234 if (i > 0) { 235 J = global_index(i - 1, j, k, m, n); 236 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 237 } 238 if (i < m - 1) { 239 J = global_index(i + 1, j, k, m, n); 240 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 241 } 242 if (j > 0) { 243 J = global_index(i, j - 1, k, m, n); 244 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 245 } 246 if (j < n - 1) { 247 J = global_index(i, j + 1, k, m, n); 248 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 249 } 250 if (k > 0) { 251 J = global_index(i, j, k - 1, m, n); 252 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 253 } 254 if (k < o - 1) { 255 J = global_index(i, j, k + 1, m, n); 256 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 257 } 258 v = 6.0; 259 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 260 } 261 } 262 PetscCall(PetscStrcmp(stencil, "3d13point", &equal)); 263 if (equal) { /* 13-point stencil, 3D */ 264 PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL)); 265 PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL)); 266 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 267 for (Ii = Istart; Ii < Iend; Ii++) { 268 v = -1.0; 269 k = Ii / (m * n); 270 j = (Ii - k * m * n) / m; 271 i = (Ii - k * m * n - j * m); 272 if (i > 0) { 273 J = global_index(i - 1, j, k, m, n); 274 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 275 } 276 if (i > 1) { 277 J = global_index(i - 2, j, k, m, n); 278 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 279 } 280 if (i < m - 1) { 281 J = global_index(i + 1, j, k, m, n); 282 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 283 } 284 if (i < m - 2) { 285 J = global_index(i + 2, j, k, m, n); 286 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 287 } 288 if (j > 0) { 289 J = global_index(i, j - 1, k, m, n); 290 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 291 } 292 if (j > 1) { 293 J = global_index(i, j - 2, k, m, n); 294 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 295 } 296 if (j < n - 1) { 297 J = global_index(i, j + 1, k, m, n); 298 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 299 } 300 if (j < n - 2) { 301 J = global_index(i, j + 2, k, m, n); 302 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 303 } 304 if (k > 0) { 305 J = global_index(i, j, k - 1, m, n); 306 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 307 } 308 if (k > 1) { 309 J = global_index(i, j, k - 2, m, n); 310 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 311 } 312 if (k < o - 1) { 313 J = global_index(i, j, k + 1, m, n); 314 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 315 } 316 if (k < o - 2) { 317 J = global_index(i, j, k + 2, m, n); 318 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 319 } 320 v = 12.0; 321 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 322 } 323 } 324 PetscCall(PetscStrcmp(stencil, "3d19point", &equal)); 325 if (equal) { /* 19-point stencil, 3D */ 326 PetscCall(MatMPIAIJSetPreallocation(A, 19, NULL, 19, NULL)); 327 PetscCall(MatSeqAIJSetPreallocation(A, 19, NULL)); 328 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 329 for (Ii = Istart; Ii < Iend; Ii++) { 330 v = -1.0; 331 k = Ii / (m * n); 332 j = (Ii - k * m * n) / m; 333 i = (Ii - k * m * n - j * m); 334 /* one hop */ 335 if (i > 0) { 336 J = global_index(i - 1, j, k, m, n); 337 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 338 } 339 if (i < m - 1) { 340 J = global_index(i + 1, j, k, m, n); 341 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 342 } 343 if (j > 0) { 344 J = global_index(i, j - 1, k, m, n); 345 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 346 } 347 if (j < n - 1) { 348 J = global_index(i, j + 1, k, m, n); 349 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 350 } 351 if (k > 0) { 352 J = global_index(i, j, k - 1, m, n); 353 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 354 } 355 if (k < o - 1) { 356 J = global_index(i, j, k + 1, m, n); 357 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 358 } 359 /* two hops */ 360 if (i > 0 && j > 0) { 361 J = global_index(i - 1, j - 1, k, m, n); 362 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 363 } 364 if (i > 0 && k > 0) { 365 J = global_index(i - 1, j, k - 1, m, n); 366 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 367 } 368 if (i > 0 && j < n - 1) { 369 J = global_index(i - 1, j + 1, k, m, n); 370 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 371 } 372 if (i > 0 && k < o - 1) { 373 J = global_index(i - 1, j, k + 1, m, n); 374 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 375 } 376 if (i < m - 1 && j > 0) { 377 J = global_index(i + 1, j - 1, k, m, n); 378 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 379 } 380 if (i < m - 1 && k > 0) { 381 J = global_index(i + 1, j, k - 1, m, n); 382 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 383 } 384 if (i < m - 1 && j < n - 1) { 385 J = global_index(i + 1, j + 1, k, m, n); 386 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 387 } 388 if (i < m - 1 && k < o - 1) { 389 J = global_index(i + 1, j, k + 1, m, n); 390 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 391 } 392 if (j > 0 && k > 0) { 393 J = global_index(i, j - 1, k - 1, m, n); 394 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 395 } 396 if (j > 0 && k < o - 1) { 397 J = global_index(i, j - 1, k + 1, m, n); 398 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 399 } 400 if (j < n - 1 && k > 0) { 401 J = global_index(i, j + 1, k - 1, m, n); 402 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 403 } 404 if (j < n - 1 && k < o - 1) { 405 J = global_index(i, j + 1, k + 1, m, n); 406 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 407 } 408 v = 18.0; 409 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 410 } 411 } 412 PetscCall(PetscStrcmp(stencil, "3d27point", &equal)); 413 if (equal) { /* 27-point stencil, 3D */ 414 PetscCall(MatMPIAIJSetPreallocation(A, 27, NULL, 27, NULL)); 415 PetscCall(MatSeqAIJSetPreallocation(A, 27, NULL)); 416 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 417 for (Ii = Istart; Ii < Iend; Ii++) { 418 v = -1.0; 419 k = Ii / (m * n); 420 j = (Ii - k * m * n) / m; 421 i = (Ii - k * m * n - j * m); 422 if (k > 0) { 423 if (j > 0) { 424 if (i > 0) { 425 J = global_index(i - 1, j - 1, k - 1, m, n); 426 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 427 } 428 J = global_index(i, j - 1, k - 1, m, n); 429 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 430 if (i < m - 1) { 431 J = global_index(i + 1, j - 1, k - 1, m, n); 432 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 433 } 434 } 435 { 436 if (i > 0) { 437 J = global_index(i - 1, j, k - 1, m, n); 438 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 439 } 440 J = global_index(i, j, k - 1, m, n); 441 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 442 if (i < m - 1) { 443 J = global_index(i + 1, j, k - 1, m, n); 444 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 445 } 446 } 447 if (j < n - 1) { 448 if (i > 0) { 449 J = global_index(i - 1, j + 1, k - 1, m, n); 450 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 451 } 452 J = global_index(i, j + 1, k - 1, m, n); 453 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 454 if (i < m - 1) { 455 J = global_index(i + 1, j + 1, k - 1, m, n); 456 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 457 } 458 } 459 } 460 { 461 if (j > 0) { 462 if (i > 0) { 463 J = global_index(i - 1, j - 1, k, m, n); 464 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 465 } 466 J = global_index(i, j - 1, k, m, n); 467 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 468 if (i < m - 1) { 469 J = global_index(i + 1, j - 1, k, m, n); 470 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 471 } 472 } 473 { 474 if (i > 0) { 475 J = global_index(i - 1, j, k, m, n); 476 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 477 } 478 J = global_index(i, j, k, m, n); 479 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 480 if (i < m - 1) { 481 J = global_index(i + 1, j, k, m, n); 482 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 483 } 484 } 485 if (j < n - 1) { 486 if (i > 0) { 487 J = global_index(i - 1, j + 1, k, m, n); 488 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 489 } 490 J = global_index(i, j + 1, k, m, n); 491 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 492 if (i < m - 1) { 493 J = global_index(i + 1, j + 1, k, m, n); 494 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 495 } 496 } 497 } 498 if (k < o - 1) { 499 if (j > 0) { 500 if (i > 0) { 501 J = global_index(i - 1, j - 1, k + 1, m, n); 502 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 503 } 504 J = global_index(i, j - 1, k + 1, m, n); 505 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 506 if (i < m - 1) { 507 J = global_index(i + 1, j - 1, k + 1, m, n); 508 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 509 } 510 } 511 { 512 if (i > 0) { 513 J = global_index(i - 1, j, k + 1, m, n); 514 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 515 } 516 J = global_index(i, j, k + 1, m, n); 517 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 518 if (i < m - 1) { 519 J = global_index(i + 1, j, k + 1, m, n); 520 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 521 } 522 } 523 if (j < n - 1) { 524 if (i > 0) { 525 J = global_index(i - 1, j + 1, k + 1, m, n); 526 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 527 } 528 J = global_index(i, j + 1, k + 1, m, n); 529 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 530 if (i < m - 1) { 531 J = global_index(i + 1, j + 1, k + 1, m, n); 532 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 533 } 534 } 535 } 536 v = 26.0; 537 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 538 } 539 } 540 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 541 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 542 543 /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */ 544 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 545 546 PetscCall(PetscLogStageRegister("Full MatMatMult", &fullMatMatMultStage)); 547 548 /* Test C = A*B */ 549 PetscCall(PetscLogStagePush(fullMatMatMultStage)); 550 PetscCall(MatMatMult(A, B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &C)); 551 552 /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C) */ 553 PetscCall(MatPtAP(C, C, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &PtAP)); 554 PetscCall(MatDuplicate(PtAP, MAT_COPY_VALUES, &PtAP_copy)); 555 PetscCall(MatMatMult(PtAP, PtAP_copy, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &PtAP_squared)); 556 557 PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD)); 558 PetscCall(MatView(PtAP_squared, PETSC_VIEWER_STDOUT_WORLD)); 559 560 PetscCall(MatDestroy(&PtAP_squared)); 561 PetscCall(MatDestroy(&PtAP_copy)); 562 PetscCall(MatDestroy(&PtAP)); 563 PetscCall(MatDestroy(&C)); 564 PetscCall(MatDestroy(&B)); 565 PetscCall(MatDestroy(&A)); 566 PetscCall(PetscFinalize()); 567 return 0; 568 } 569 570 /*TEST 571 572 test: 573 suffix: 1 574 nsize: 1 575 args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted 576 577 test: 578 suffix: 2 579 nsize: 1 580 args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge 581 582 test: 583 suffix: 3 584 nsize: 4 585 args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi 586 587 TEST*/ 588