1 /* 2 Defines projective product routines where A is a SeqAIJ matrix 3 C = R * A * R^T 4 */ 5 6 #include <../src/mat/impls/aij/seq/aij.h> 7 #include <../src/mat/utils/freespace.h> 8 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 9 10 static PetscErrorCode MatDestroy_SeqAIJ_RARt(void *data) 11 { 12 Mat_RARt *rart = (Mat_RARt *)data; 13 14 PetscFunctionBegin; 15 PetscCall(MatTransposeColoringDestroy(&rart->matcoloring)); 16 PetscCall(MatDestroy(&rart->Rt)); 17 PetscCall(MatDestroy(&rart->RARt)); 18 PetscCall(MatDestroy(&rart->ARt)); 19 PetscCall(PetscFree(rart->work)); 20 if (rart->destroy) PetscCall((*rart->destroy)(rart->data)); 21 PetscCall(PetscFree(rart)); 22 PetscFunctionReturn(PETSC_SUCCESS); 23 } 24 25 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, PetscReal fill, Mat C) 26 { 27 Mat P; 28 Mat_RARt *rart; 29 MatColoring coloring; 30 MatTransposeColoring matcoloring; 31 ISColoring iscoloring; 32 Mat Rt_dense, RARt_dense; 33 34 PetscFunctionBegin; 35 MatCheckProduct(C, 4); 36 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 37 /* create symbolic P=Rt */ 38 PetscCall(MatTransposeSymbolic(R, &P)); 39 40 /* get symbolic C=Pt*A*P */ 41 PetscCall(MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A, P, fill, C)); 42 PetscCall(MatSetBlockSizes(C, PetscAbs(R->rmap->bs), PetscAbs(R->rmap->bs))); 43 C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart; 44 45 /* create a supporting struct */ 46 PetscCall(PetscNew(&rart)); 47 C->product->data = rart; 48 C->product->destroy = MatDestroy_SeqAIJ_RARt; 49 50 /* Use coloring */ 51 /* inode causes memory problem */ 52 PetscCall(MatSetOption(C, MAT_USE_INODES, PETSC_FALSE)); 53 54 /* Create MatTransposeColoring from symbolic C=R*A*R^T */ 55 PetscCall(MatColoringCreate(C, &coloring)); 56 PetscCall(MatColoringSetDistance(coloring, 2)); 57 PetscCall(MatColoringSetType(coloring, MATCOLORINGSL)); 58 PetscCall(MatColoringSetFromOptions(coloring)); 59 PetscCall(MatColoringApply(coloring, &iscoloring)); 60 PetscCall(MatColoringDestroy(&coloring)); 61 PetscCall(MatTransposeColoringCreate(C, iscoloring, &matcoloring)); 62 63 rart->matcoloring = matcoloring; 64 PetscCall(ISColoringDestroy(&iscoloring)); 65 66 /* Create Rt_dense */ 67 PetscCall(MatCreate(PETSC_COMM_SELF, &Rt_dense)); 68 PetscCall(MatSetSizes(Rt_dense, A->cmap->n, matcoloring->ncolors, A->cmap->n, matcoloring->ncolors)); 69 PetscCall(MatSetType(Rt_dense, MATSEQDENSE)); 70 PetscCall(MatSeqDenseSetPreallocation(Rt_dense, NULL)); 71 72 Rt_dense->assembled = PETSC_TRUE; 73 rart->Rt = Rt_dense; 74 75 /* Create RARt_dense = R*A*Rt_dense */ 76 PetscCall(MatCreate(PETSC_COMM_SELF, &RARt_dense)); 77 PetscCall(MatSetSizes(RARt_dense, C->rmap->n, matcoloring->ncolors, C->rmap->n, matcoloring->ncolors)); 78 PetscCall(MatSetType(RARt_dense, MATSEQDENSE)); 79 PetscCall(MatSeqDenseSetPreallocation(RARt_dense, NULL)); 80 81 rart->RARt = RARt_dense; 82 83 /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */ 84 PetscCall(PetscMalloc1(A->rmap->n * 4, &rart->work)); 85 86 /* clean up */ 87 PetscCall(MatDestroy(&P)); 88 89 #if defined(PETSC_USE_INFO) 90 { 91 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 92 PetscReal density = (PetscReal)c->nz / (RARt_dense->rmap->n * RARt_dense->cmap->n); 93 PetscCall(PetscInfo(C, "C=R*(A*Rt) via coloring C - use sparse-dense inner products\n")); 94 PetscCall(PetscInfo(C, "RARt_den %" PetscInt_FMT " %" PetscInt_FMT "; Rt %" PetscInt_FMT " %" PetscInt_FMT " (RARt->nz %" PetscInt_FMT ")/(m*ncolors)=%g\n", RARt_dense->rmap->n, RARt_dense->cmap->n, R->cmap->n, R->rmap->n, c->nz, (double)density)); 95 } 96 #endif 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 /* 101 RAB = R * A * B, R and A in seqaij format, B in dense format; 102 */ 103 static PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R, Mat A, Mat B, Mat RAB, PetscScalar *work) 104 { 105 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *r = (Mat_SeqAIJ *)R->data; 106 PetscScalar r1, r2, r3, r4; 107 const PetscScalar *b, *b1, *b2, *b3, *b4; 108 MatScalar *aa, *ra; 109 PetscInt cn = B->cmap->n, bm = B->rmap->n, col, i, j, n, *ai = a->i, *aj, am = A->rmap->n; 110 PetscInt am2 = 2 * am, am3 = 3 * am, bm4 = 4 * bm; 111 PetscScalar *d, *c, *c2, *c3, *c4; 112 PetscInt *rj, rm = R->rmap->n, dm = RAB->rmap->n, dn = RAB->cmap->n; 113 PetscInt rm2 = 2 * rm, rm3 = 3 * rm, colrm; 114 115 PetscFunctionBegin; 116 if (!dm || !dn) PetscFunctionReturn(PETSC_SUCCESS); 117 PetscCheck(bm == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT, A->cmap->n, bm); 118 PetscCheck(am == R->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in R %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT, R->cmap->n, am); 119 PetscCheck(R->rmap->n == RAB->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows in RAB %" PetscInt_FMT " not equal rows in R %" PetscInt_FMT, RAB->rmap->n, R->rmap->n); 120 PetscCheck(B->cmap->n == RAB->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in RAB %" PetscInt_FMT " not equal columns in B %" PetscInt_FMT, RAB->cmap->n, B->cmap->n); 121 122 { /* 123 This approach is not as good as original ones (will be removed later), but it reveals that 124 AB_den=A*B takes almost all execution time in R*A*B for src/ksp/ksp/tutorials/ex56.c 125 */ 126 PetscBool via_matmatmult = PETSC_FALSE; 127 PetscCall(PetscOptionsGetBool(NULL, NULL, "-matrart_via_matmatmult", &via_matmatmult, NULL)); 128 if (via_matmatmult) { 129 Mat AB_den = NULL; 130 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &AB_den)); 131 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqDense(A, B, 0.0, AB_den)); 132 PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A, B, AB_den)); 133 PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(R, AB_den, RAB)); 134 PetscCall(MatDestroy(&AB_den)); 135 PetscFunctionReturn(PETSC_SUCCESS); 136 } 137 } 138 139 PetscCall(MatDenseGetArrayRead(B, &b)); 140 PetscCall(MatDenseGetArray(RAB, &d)); 141 b1 = b; 142 b2 = b1 + bm; 143 b3 = b2 + bm; 144 b4 = b3 + bm; 145 c = work; 146 c2 = c + am; 147 c3 = c2 + am; 148 c4 = c3 + am; 149 for (col = 0; col < cn - 4; col += 4) { /* over columns of C */ 150 for (i = 0; i < am; i++) { /* over rows of A in those columns */ 151 r1 = r2 = r3 = r4 = 0.0; 152 n = ai[i + 1] - ai[i]; 153 aj = a->j + ai[i]; 154 aa = a->a + ai[i]; 155 for (j = 0; j < n; j++) { 156 r1 += (*aa) * b1[*aj]; 157 r2 += (*aa) * b2[*aj]; 158 r3 += (*aa) * b3[*aj]; 159 r4 += (*aa++) * b4[*aj++]; 160 } 161 c[i] = r1; 162 c[am + i] = r2; 163 c[am2 + i] = r3; 164 c[am3 + i] = r4; 165 } 166 b1 += bm4; 167 b2 += bm4; 168 b3 += bm4; 169 b4 += bm4; 170 171 /* RAB[:,col] = R*C[:,col] */ 172 colrm = col * rm; 173 for (i = 0; i < rm; i++) { /* over rows of R in those columns */ 174 r1 = r2 = r3 = r4 = 0.0; 175 n = r->i[i + 1] - r->i[i]; 176 rj = r->j + r->i[i]; 177 ra = r->a + r->i[i]; 178 for (j = 0; j < n; j++) { 179 r1 += (*ra) * c[*rj]; 180 r2 += (*ra) * c2[*rj]; 181 r3 += (*ra) * c3[*rj]; 182 r4 += (*ra++) * c4[*rj++]; 183 } 184 d[colrm + i] = r1; 185 d[colrm + rm + i] = r2; 186 d[colrm + rm2 + i] = r3; 187 d[colrm + rm3 + i] = r4; 188 } 189 } 190 for (; col < cn; col++) { /* over extra columns of C */ 191 for (i = 0; i < am; i++) { /* over rows of A in those columns */ 192 r1 = 0.0; 193 n = a->i[i + 1] - a->i[i]; 194 aj = a->j + a->i[i]; 195 aa = a->a + a->i[i]; 196 for (j = 0; j < n; j++) r1 += (*aa++) * b1[*aj++]; 197 c[i] = r1; 198 } 199 b1 += bm; 200 201 for (i = 0; i < rm; i++) { /* over rows of R in those columns */ 202 r1 = 0.0; 203 n = r->i[i + 1] - r->i[i]; 204 rj = r->j + r->i[i]; 205 ra = r->a + r->i[i]; 206 for (j = 0; j < n; j++) r1 += (*ra++) * c[*rj++]; 207 d[col * rm + i] = r1; 208 } 209 } 210 PetscCall(PetscLogFlops(cn * 2.0 * (a->nz + r->nz))); 211 212 PetscCall(MatDenseRestoreArrayRead(B, &b)); 213 PetscCall(MatDenseRestoreArray(RAB, &d)); 214 PetscCall(MatAssemblyBegin(RAB, MAT_FINAL_ASSEMBLY)); 215 PetscCall(MatAssemblyEnd(RAB, MAT_FINAL_ASSEMBLY)); 216 PetscFunctionReturn(PETSC_SUCCESS); 217 } 218 219 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, Mat C) 220 { 221 Mat_RARt *rart; 222 MatTransposeColoring matcoloring; 223 Mat Rt, RARt; 224 225 PetscFunctionBegin; 226 MatCheckProduct(C, 3); 227 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 228 rart = (Mat_RARt *)C->product->data; 229 230 /* Get dense Rt by Apply MatTransposeColoring to R */ 231 matcoloring = rart->matcoloring; 232 Rt = rart->Rt; 233 PetscCall(MatTransColoringApplySpToDen(matcoloring, R, Rt)); 234 235 /* Get dense RARt = R*A*Rt -- dominates! */ 236 RARt = rart->RARt; 237 PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R, A, Rt, RARt, rart->work)); 238 239 /* Recover C from C_dense */ 240 PetscCall(MatTransColoringApplyDenToSp(matcoloring, RARt, C)); 241 PetscFunctionReturn(PETSC_SUCCESS); 242 } 243 244 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, PetscReal fill, Mat C) 245 { 246 Mat ARt; 247 Mat_RARt *rart; 248 char *alg; 249 250 PetscFunctionBegin; 251 MatCheckProduct(C, 4); 252 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 253 /* create symbolic ARt = A*R^T */ 254 PetscCall(MatProductCreate(A, R, NULL, &ARt)); 255 PetscCall(MatProductSetType(ARt, MATPRODUCT_ABt)); 256 PetscCall(MatProductSetAlgorithm(ARt, "sorted")); 257 PetscCall(MatProductSetFill(ARt, fill)); 258 PetscCall(MatProductSetFromOptions(ARt)); 259 PetscCall(MatProductSymbolic(ARt)); 260 261 /* compute symbolic C = R*ARt */ 262 /* set algorithm for C = R*ARt */ 263 PetscCall(PetscStrallocpy(C->product->alg, &alg)); 264 PetscCall(MatProductSetAlgorithm(C, "sorted")); 265 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(R, ARt, fill, C)); 266 /* resume original algorithm for C */ 267 PetscCall(MatProductSetAlgorithm(C, alg)); 268 PetscCall(PetscFree(alg)); 269 270 C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult; 271 272 PetscCall(PetscNew(&rart)); 273 rart->ARt = ARt; 274 C->product->data = rart; 275 C->product->destroy = MatDestroy_SeqAIJ_RARt; 276 PetscCall(PetscInfo(C, "Use ARt=A*R^T, C=R*ARt via MatMatTransposeMult(). Coloring can be applied to A*R^T.\n")); 277 PetscFunctionReturn(PETSC_SUCCESS); 278 } 279 280 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, Mat C) 281 { 282 Mat_RARt *rart; 283 284 PetscFunctionBegin; 285 MatCheckProduct(C, 3); 286 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 287 rart = (Mat_RARt *)C->product->data; 288 PetscCall(MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A, R, rart->ARt)); /* dominate! */ 289 PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(R, rart->ARt, C)); 290 PetscFunctionReturn(PETSC_SUCCESS); 291 } 292 293 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A, Mat R, PetscReal fill, Mat C) 294 { 295 Mat Rt; 296 Mat_RARt *rart; 297 298 PetscFunctionBegin; 299 MatCheckProduct(C, 4); 300 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 301 PetscCall(MatTranspose(R, MAT_INITIAL_MATRIX, &Rt)); 302 PetscCall(MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ(R, A, Rt, fill, C)); 303 304 PetscCall(PetscNew(&rart)); 305 rart->data = C->product->data; 306 rart->destroy = C->product->destroy; 307 rart->Rt = Rt; 308 C->product->data = rart; 309 C->product->destroy = MatDestroy_SeqAIJ_RARt; 310 C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ; 311 PetscCall(PetscInfo(C, "Use Rt=R^T and C=R*A*Rt via MatMatMatMult() to avoid sparse inner products\n")); 312 PetscFunctionReturn(PETSC_SUCCESS); 313 } 314 315 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A, Mat R, Mat C) 316 { 317 Mat_RARt *rart; 318 319 PetscFunctionBegin; 320 MatCheckProduct(C, 3); 321 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 322 rart = (Mat_RARt *)C->product->data; 323 PetscCall(MatTranspose(R, MAT_REUSE_MATRIX, &rart->Rt)); 324 /* MatMatMatMultSymbolic used a different data */ 325 C->product->data = rart->data; 326 PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqAIJ(R, A, rart->Rt, C)); 327 C->product->data = rart; 328 PetscFunctionReturn(PETSC_SUCCESS); 329 } 330 331 PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A, Mat R, MatReuse scall, PetscReal fill, Mat *C) 332 { 333 const char *algTypes[3] = {"matmatmatmult", "matmattransposemult", "coloring_rart"}; 334 PetscInt alg = 0; /* set default algorithm */ 335 336 PetscFunctionBegin; 337 if (scall == MAT_INITIAL_MATRIX) { 338 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "MatRARt", "Mat"); 339 PetscCall(PetscOptionsEList("-matrart_via", "Algorithmic approach", "MatRARt", algTypes, 3, algTypes[0], &alg, NULL)); 340 PetscOptionsEnd(); 341 342 PetscCall(PetscLogEventBegin(MAT_RARtSymbolic, A, R, 0, 0)); 343 PetscCall(MatCreate(PETSC_COMM_SELF, C)); 344 switch (alg) { 345 case 1: 346 /* via matmattransposemult: ARt=A*R^T, C=R*ARt - matrix coloring can be applied to A*R^T */ 347 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, *C)); 348 break; 349 case 2: 350 /* via coloring_rart: apply coloring C = R*A*R^T */ 351 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, *C)); 352 break; 353 default: 354 /* via matmatmatmult: Rt=R^T, C=R*A*Rt - avoid inefficient sparse inner products */ 355 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, *C)); 356 break; 357 } 358 PetscCall(PetscLogEventEnd(MAT_RARtSymbolic, A, R, 0, 0)); 359 } 360 361 PetscCall(PetscLogEventBegin(MAT_RARtNumeric, A, R, 0, 0)); 362 PetscCall(((*C)->ops->rartnumeric)(A, R, *C)); 363 PetscCall(PetscLogEventEnd(MAT_RARtNumeric, A, R, 0, 0)); 364 PetscFunctionReturn(PETSC_SUCCESS); 365 } 366 367 PetscErrorCode MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C) 368 { 369 Mat_Product *product = C->product; 370 Mat A = product->A, R = product->B; 371 MatProductAlgorithm alg = product->alg; 372 PetscReal fill = product->fill; 373 PetscBool flg; 374 375 PetscFunctionBegin; 376 PetscCall(PetscStrcmp(alg, "r*a*rt", &flg)); 377 if (flg) { 378 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, C)); 379 goto next; 380 } 381 382 PetscCall(PetscStrcmp(alg, "r*art", &flg)); 383 if (flg) { 384 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, C)); 385 goto next; 386 } 387 388 PetscCall(PetscStrcmp(alg, "coloring_rart", &flg)); 389 if (flg) { 390 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, C)); 391 goto next; 392 } 393 394 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatProductAlgorithm is not supported"); 395 396 next: 397 C->ops->productnumeric = MatProductNumeric_RARt; 398 PetscFunctionReturn(PETSC_SUCCESS); 399 } 400