1 static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n"; 2 3 #include <petscmat.h> 4 5 /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */ 6 int global_index(PetscInt i, PetscInt j, PetscInt k, PetscInt m, PetscInt n) 7 { 8 return i + j * m + k * m * n; 9 } 10 11 int main(int argc, char **argv) 12 { 13 Mat A, B, C, PtAP, PtAP_copy, PtAP_squared; 14 PetscInt i, M, N, Istart, Iend, n = 7, j, J, Ii, m = 8, k, o = 1; 15 PetscScalar v; 16 PetscBool equal = PETSC_FALSE, mat_view = PETSC_FALSE; 17 char stencil[PETSC_MAX_PATH_LEN]; 18 #if defined(PETSC_USE_LOG) 19 PetscLogStage fullMatMatMultStage; 20 #endif 21 22 PetscFunctionBeginUser; 23 PetscCall(PetscInitialize(&argc, &argv, (char *)0, help)); 24 PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL)); 25 PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL)); 26 PetscCall(PetscOptionsGetInt(NULL, NULL, "-o", &o, NULL)); 27 PetscCall(PetscOptionsHasName(NULL, NULL, "-result_view", &mat_view)); 28 PetscCall(PetscOptionsGetString(NULL, NULL, "-stencil", stencil, sizeof(stencil), NULL)); 29 30 /* Create a aij matrix A */ 31 M = N = m * n * o; 32 PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 33 PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, M, N)); 34 PetscCall(MatSetType(A, MATAIJ)); 35 PetscCall(MatSetFromOptions(A)); 36 37 /* Consistency checks */ 38 PetscCheck(o >= 1 && m > 1 && n >= 1, PETSC_COMM_WORLD, PETSC_ERR_USER, "Dimensions need to be larger than zero!"); 39 40 /************ 2D stencils ***************/ 41 PetscCall(PetscStrcmp(stencil, "2d5point", &equal)); 42 if (equal) { /* 5-point stencil, 2D */ 43 PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL)); 44 PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL)); 45 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 46 for (Ii = Istart; Ii < Iend; Ii++) { 47 v = -1.0; 48 k = Ii / (m * n); 49 j = (Ii - k * m * n) / m; 50 i = (Ii - k * m * n - j * m); 51 if (i > 0) { 52 J = global_index(i - 1, j, k, m, n); 53 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 54 } 55 if (i < m - 1) { 56 J = global_index(i + 1, j, k, m, n); 57 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 58 } 59 if (j > 0) { 60 J = global_index(i, j - 1, k, m, n); 61 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 62 } 63 if (j < n - 1) { 64 J = global_index(i, j + 1, k, m, n); 65 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 66 } 67 v = 4.0; 68 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 69 } 70 } 71 PetscCall(PetscStrcmp(stencil, "2d9point", &equal)); 72 if (equal) { /* 9-point stencil, 2D */ 73 PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL)); 74 PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL)); 75 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 76 for (Ii = Istart; Ii < Iend; Ii++) { 77 v = -1.0; 78 k = Ii / (m * n); 79 j = (Ii - k * m * n) / m; 80 i = (Ii - k * m * n - j * m); 81 if (i > 0) { 82 J = global_index(i - 1, j, k, m, n); 83 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 84 } 85 if (i > 0 && j > 0) { 86 J = global_index(i - 1, j - 1, k, m, n); 87 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 88 } 89 if (j > 0) { 90 J = global_index(i, j - 1, k, m, n); 91 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 92 } 93 if (i < m - 1 && j > 0) { 94 J = global_index(i + 1, j - 1, k, m, n); 95 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 96 } 97 if (i < m - 1) { 98 J = global_index(i + 1, j, k, m, n); 99 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 100 } 101 if (i < m - 1 && j < n - 1) { 102 J = global_index(i + 1, j + 1, k, m, n); 103 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 104 } 105 if (j < n - 1) { 106 J = global_index(i, j + 1, k, m, n); 107 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 108 } 109 if (i > 0 && j < n - 1) { 110 J = global_index(i - 1, j + 1, k, m, n); 111 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 112 } 113 v = 8.0; 114 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 115 } 116 } 117 PetscCall(PetscStrcmp(stencil, "2d9point2", &equal)); 118 if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */ 119 PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL)); 120 PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL)); 121 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 122 for (Ii = Istart; Ii < Iend; Ii++) { 123 v = -1.0; 124 k = Ii / (m * n); 125 j = (Ii - k * m * n) / m; 126 i = (Ii - k * m * n - j * m); 127 if (i > 0) { 128 J = global_index(i - 1, j, k, m, n); 129 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 130 } 131 if (i > 1) { 132 J = global_index(i - 2, j, k, m, n); 133 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 134 } 135 if (i < m - 1) { 136 J = global_index(i + 1, j, k, m, n); 137 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 138 } 139 if (i < m - 2) { 140 J = global_index(i + 2, j, k, m, n); 141 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 142 } 143 if (j > 0) { 144 J = global_index(i, j - 1, k, m, n); 145 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 146 } 147 if (j > 1) { 148 J = global_index(i, j - 2, k, m, n); 149 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 150 } 151 if (j < n - 1) { 152 J = global_index(i, j + 1, k, m, n); 153 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 154 } 155 if (j < n - 2) { 156 J = global_index(i, j + 2, k, m, n); 157 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 158 } 159 v = 8.0; 160 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 161 } 162 } 163 PetscCall(PetscStrcmp(stencil, "2d13point", &equal)); 164 if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */ 165 PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL)); 166 PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL)); 167 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 168 for (Ii = Istart; Ii < Iend; Ii++) { 169 v = -1.0; 170 k = Ii / (m * n); 171 j = (Ii - k * m * n) / m; 172 i = (Ii - k * m * n - j * m); 173 if (i > 0) { 174 J = global_index(i - 1, j, k, m, n); 175 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 176 } 177 if (i > 1) { 178 J = global_index(i - 2, j, k, m, n); 179 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 180 } 181 if (i > 2) { 182 J = global_index(i - 3, j, k, m, n); 183 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 184 } 185 if (i < m - 1) { 186 J = global_index(i + 1, j, k, m, n); 187 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 188 } 189 if (i < m - 2) { 190 J = global_index(i + 2, j, k, m, n); 191 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 192 } 193 if (i < m - 3) { 194 J = global_index(i + 3, j, k, m, n); 195 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 196 } 197 if (j > 0) { 198 J = global_index(i, j - 1, k, m, n); 199 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 200 } 201 if (j > 1) { 202 J = global_index(i, j - 2, k, m, n); 203 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 204 } 205 if (j > 2) { 206 J = global_index(i, j - 3, k, m, n); 207 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 208 } 209 if (j < n - 1) { 210 J = global_index(i, j + 1, k, m, n); 211 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 212 } 213 if (j < n - 2) { 214 J = global_index(i, j + 2, k, m, n); 215 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 216 } 217 if (j < n - 3) { 218 J = global_index(i, j + 3, k, m, n); 219 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 220 } 221 v = 12.0; 222 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 223 } 224 } 225 /************ 3D stencils ***************/ 226 PetscCall(PetscStrcmp(stencil, "3d7point", &equal)); 227 if (equal) { /* 7-point stencil, 3D */ 228 PetscCall(MatMPIAIJSetPreallocation(A, 7, NULL, 7, NULL)); 229 PetscCall(MatSeqAIJSetPreallocation(A, 7, NULL)); 230 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 231 for (Ii = Istart; Ii < Iend; Ii++) { 232 v = -1.0; 233 k = Ii / (m * n); 234 j = (Ii - k * m * n) / m; 235 i = (Ii - k * m * n - j * m); 236 if (i > 0) { 237 J = global_index(i - 1, j, k, m, n); 238 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 239 } 240 if (i < m - 1) { 241 J = global_index(i + 1, j, k, m, n); 242 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 243 } 244 if (j > 0) { 245 J = global_index(i, j - 1, k, m, n); 246 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 247 } 248 if (j < n - 1) { 249 J = global_index(i, j + 1, k, m, n); 250 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 251 } 252 if (k > 0) { 253 J = global_index(i, j, k - 1, m, n); 254 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 255 } 256 if (k < o - 1) { 257 J = global_index(i, j, k + 1, m, n); 258 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 259 } 260 v = 6.0; 261 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 262 } 263 } 264 PetscCall(PetscStrcmp(stencil, "3d13point", &equal)); 265 if (equal) { /* 13-point stencil, 3D */ 266 PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL)); 267 PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL)); 268 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 269 for (Ii = Istart; Ii < Iend; Ii++) { 270 v = -1.0; 271 k = Ii / (m * n); 272 j = (Ii - k * m * n) / m; 273 i = (Ii - k * m * n - j * m); 274 if (i > 0) { 275 J = global_index(i - 1, j, k, m, n); 276 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 277 } 278 if (i > 1) { 279 J = global_index(i - 2, j, k, m, n); 280 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 281 } 282 if (i < m - 1) { 283 J = global_index(i + 1, j, k, m, n); 284 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 285 } 286 if (i < m - 2) { 287 J = global_index(i + 2, j, k, m, n); 288 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 289 } 290 if (j > 0) { 291 J = global_index(i, j - 1, k, m, n); 292 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 293 } 294 if (j > 1) { 295 J = global_index(i, j - 2, k, m, n); 296 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 297 } 298 if (j < n - 1) { 299 J = global_index(i, j + 1, k, m, n); 300 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 301 } 302 if (j < n - 2) { 303 J = global_index(i, j + 2, k, m, n); 304 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 305 } 306 if (k > 0) { 307 J = global_index(i, j, k - 1, m, n); 308 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 309 } 310 if (k > 1) { 311 J = global_index(i, j, k - 2, m, n); 312 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 313 } 314 if (k < o - 1) { 315 J = global_index(i, j, k + 1, m, n); 316 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 317 } 318 if (k < o - 2) { 319 J = global_index(i, j, k + 2, m, n); 320 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 321 } 322 v = 12.0; 323 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 324 } 325 } 326 PetscCall(PetscStrcmp(stencil, "3d19point", &equal)); 327 if (equal) { /* 19-point stencil, 3D */ 328 PetscCall(MatMPIAIJSetPreallocation(A, 19, NULL, 19, NULL)); 329 PetscCall(MatSeqAIJSetPreallocation(A, 19, NULL)); 330 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 331 for (Ii = Istart; Ii < Iend; Ii++) { 332 v = -1.0; 333 k = Ii / (m * n); 334 j = (Ii - k * m * n) / m; 335 i = (Ii - k * m * n - j * m); 336 /* one hop */ 337 if (i > 0) { 338 J = global_index(i - 1, j, k, m, n); 339 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 340 } 341 if (i < m - 1) { 342 J = global_index(i + 1, j, k, m, n); 343 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 344 } 345 if (j > 0) { 346 J = global_index(i, j - 1, k, m, n); 347 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 348 } 349 if (j < n - 1) { 350 J = global_index(i, j + 1, k, m, n); 351 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 352 } 353 if (k > 0) { 354 J = global_index(i, j, k - 1, m, n); 355 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 356 } 357 if (k < o - 1) { 358 J = global_index(i, j, k + 1, m, n); 359 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 360 } 361 /* two hops */ 362 if (i > 0 && j > 0) { 363 J = global_index(i - 1, j - 1, k, m, n); 364 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 365 } 366 if (i > 0 && k > 0) { 367 J = global_index(i - 1, j, k - 1, m, n); 368 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 369 } 370 if (i > 0 && j < n - 1) { 371 J = global_index(i - 1, j + 1, k, m, n); 372 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 373 } 374 if (i > 0 && k < o - 1) { 375 J = global_index(i - 1, j, k + 1, m, n); 376 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 377 } 378 if (i < m - 1 && j > 0) { 379 J = global_index(i + 1, j - 1, k, m, n); 380 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 381 } 382 if (i < m - 1 && k > 0) { 383 J = global_index(i + 1, j, k - 1, m, n); 384 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 385 } 386 if (i < m - 1 && j < n - 1) { 387 J = global_index(i + 1, j + 1, k, m, n); 388 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 389 } 390 if (i < m - 1 && k < o - 1) { 391 J = global_index(i + 1, j, k + 1, m, n); 392 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 393 } 394 if (j > 0 && k > 0) { 395 J = global_index(i, j - 1, k - 1, m, n); 396 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 397 } 398 if (j > 0 && k < o - 1) { 399 J = global_index(i, j - 1, k + 1, m, n); 400 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 401 } 402 if (j < n - 1 && k > 0) { 403 J = global_index(i, j + 1, k - 1, m, n); 404 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 405 } 406 if (j < n - 1 && k < o - 1) { 407 J = global_index(i, j + 1, k + 1, m, n); 408 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 409 } 410 v = 18.0; 411 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 412 } 413 } 414 PetscCall(PetscStrcmp(stencil, "3d27point", &equal)); 415 if (equal) { /* 27-point stencil, 3D */ 416 PetscCall(MatMPIAIJSetPreallocation(A, 27, NULL, 27, NULL)); 417 PetscCall(MatSeqAIJSetPreallocation(A, 27, NULL)); 418 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); 419 for (Ii = Istart; Ii < Iend; Ii++) { 420 v = -1.0; 421 k = Ii / (m * n); 422 j = (Ii - k * m * n) / m; 423 i = (Ii - k * m * n - j * m); 424 if (k > 0) { 425 if (j > 0) { 426 if (i > 0) { 427 J = global_index(i - 1, j - 1, k - 1, m, n); 428 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 429 } 430 J = global_index(i, j - 1, k - 1, m, n); 431 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 432 if (i < m - 1) { 433 J = global_index(i + 1, j - 1, k - 1, m, n); 434 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 435 } 436 } 437 { 438 if (i > 0) { 439 J = global_index(i - 1, j, k - 1, m, n); 440 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 441 } 442 J = global_index(i, j, k - 1, m, n); 443 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 444 if (i < m - 1) { 445 J = global_index(i + 1, j, k - 1, m, n); 446 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 447 } 448 } 449 if (j < n - 1) { 450 if (i > 0) { 451 J = global_index(i - 1, j + 1, k - 1, m, n); 452 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 453 } 454 J = global_index(i, j + 1, k - 1, m, n); 455 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 456 if (i < m - 1) { 457 J = global_index(i + 1, j + 1, k - 1, m, n); 458 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 459 } 460 } 461 } 462 { 463 if (j > 0) { 464 if (i > 0) { 465 J = global_index(i - 1, j - 1, k, m, n); 466 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 467 } 468 J = global_index(i, j - 1, k, m, n); 469 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 470 if (i < m - 1) { 471 J = global_index(i + 1, j - 1, k, m, n); 472 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 473 } 474 } 475 { 476 if (i > 0) { 477 J = global_index(i - 1, j, k, m, n); 478 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 479 } 480 J = global_index(i, j, k, m, n); 481 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 482 if (i < m - 1) { 483 J = global_index(i + 1, j, k, m, n); 484 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 485 } 486 } 487 if (j < n - 1) { 488 if (i > 0) { 489 J = global_index(i - 1, j + 1, k, m, n); 490 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 491 } 492 J = global_index(i, j + 1, k, m, n); 493 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 494 if (i < m - 1) { 495 J = global_index(i + 1, j + 1, k, m, n); 496 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 497 } 498 } 499 } 500 if (k < o - 1) { 501 if (j > 0) { 502 if (i > 0) { 503 J = global_index(i - 1, j - 1, k + 1, m, n); 504 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 505 } 506 J = global_index(i, j - 1, k + 1, m, n); 507 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 508 if (i < m - 1) { 509 J = global_index(i + 1, j - 1, k + 1, m, n); 510 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 511 } 512 } 513 { 514 if (i > 0) { 515 J = global_index(i - 1, j, k + 1, m, n); 516 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 517 } 518 J = global_index(i, j, k + 1, m, n); 519 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 520 if (i < m - 1) { 521 J = global_index(i + 1, j, k + 1, m, n); 522 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 523 } 524 } 525 if (j < n - 1) { 526 if (i > 0) { 527 J = global_index(i - 1, j + 1, k + 1, m, n); 528 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 529 } 530 J = global_index(i, j + 1, k + 1, m, n); 531 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 532 if (i < m - 1) { 533 J = global_index(i + 1, j + 1, k + 1, m, n); 534 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); 535 } 536 } 537 } 538 v = 26.0; 539 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); 540 } 541 } 542 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 543 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 544 545 /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */ 546 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 547 548 PetscCall(PetscLogStageRegister("Full MatMatMult", &fullMatMatMultStage)); 549 550 /* Test C = A*B */ 551 PetscCall(PetscLogStagePush(fullMatMatMultStage)); 552 PetscCall(MatMatMult(A, B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &C)); 553 554 /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C) */ 555 PetscCall(MatPtAP(C, C, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &PtAP)); 556 PetscCall(MatDuplicate(PtAP, MAT_COPY_VALUES, &PtAP_copy)); 557 PetscCall(MatMatMult(PtAP, PtAP_copy, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &PtAP_squared)); 558 559 PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD)); 560 PetscCall(MatView(PtAP_squared, PETSC_VIEWER_STDOUT_WORLD)); 561 562 PetscCall(MatDestroy(&PtAP_squared)); 563 PetscCall(MatDestroy(&PtAP_copy)); 564 PetscCall(MatDestroy(&PtAP)); 565 PetscCall(MatDestroy(&C)); 566 PetscCall(MatDestroy(&B)); 567 PetscCall(MatDestroy(&A)); 568 PetscCall(PetscFinalize()); 569 return 0; 570 } 571 572 /*TEST 573 574 test: 575 suffix: 1 576 nsize: 1 577 args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted 578 579 test: 580 suffix: 2 581 nsize: 1 582 args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge 583 584 test: 585 suffix: 3 586 nsize: 4 587 args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi 588 589 TEST*/ 590