1 2 /* 3 Defines projective product routines where A is a SeqAIJ matrix 4 C = R * A * R^T 5 */ 6 7 #include <../src/mat/impls/aij/seq/aij.h> 8 #include <../src/mat/utils/freespace.h> 9 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 10 11 PetscErrorCode MatDestroy_SeqAIJ_RARt(void *data) { 12 Mat_RARt *rart = (Mat_RARt *)data; 13 14 PetscFunctionBegin; 15 PetscCall(MatTransposeColoringDestroy(&rart->matcoloring)); 16 PetscCall(MatDestroy(&rart->Rt)); 17 PetscCall(MatDestroy(&rart->RARt)); 18 PetscCall(MatDestroy(&rart->ARt)); 19 PetscCall(PetscFree(rart->work)); 20 if (rart->destroy) PetscCall((*rart->destroy)(rart->data)); 21 PetscCall(PetscFree(rart)); 22 PetscFunctionReturn(0); 23 } 24 25 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, PetscReal fill, Mat C) { 26 Mat P; 27 Mat_RARt *rart; 28 MatColoring coloring; 29 MatTransposeColoring matcoloring; 30 ISColoring iscoloring; 31 Mat Rt_dense, RARt_dense; 32 33 PetscFunctionBegin; 34 MatCheckProduct(C, 4); 35 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 36 /* create symbolic P=Rt */ 37 PetscCall(MatTransposeSymbolic(R, &P)); 38 39 /* get symbolic C=Pt*A*P */ 40 PetscCall(MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A, P, fill, C)); 41 PetscCall(MatSetBlockSizes(C, PetscAbs(R->rmap->bs), PetscAbs(R->rmap->bs))); 42 C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart; 43 44 /* create a supporting struct */ 45 PetscCall(PetscNew(&rart)); 46 C->product->data = rart; 47 C->product->destroy = MatDestroy_SeqAIJ_RARt; 48 49 /* ------ Use coloring ---------- */ 50 /* inode causes memory problem */ 51 PetscCall(MatSetOption(C, MAT_USE_INODES, PETSC_FALSE)); 52 53 /* Create MatTransposeColoring from symbolic C=R*A*R^T */ 54 PetscCall(MatColoringCreate(C, &coloring)); 55 PetscCall(MatColoringSetDistance(coloring, 2)); 56 PetscCall(MatColoringSetType(coloring, MATCOLORINGSL)); 57 PetscCall(MatColoringSetFromOptions(coloring)); 58 PetscCall(MatColoringApply(coloring, &iscoloring)); 59 PetscCall(MatColoringDestroy(&coloring)); 60 PetscCall(MatTransposeColoringCreate(C, iscoloring, &matcoloring)); 61 62 rart->matcoloring = matcoloring; 63 PetscCall(ISColoringDestroy(&iscoloring)); 64 65 /* Create Rt_dense */ 66 PetscCall(MatCreate(PETSC_COMM_SELF, &Rt_dense)); 67 PetscCall(MatSetSizes(Rt_dense, A->cmap->n, matcoloring->ncolors, A->cmap->n, matcoloring->ncolors)); 68 PetscCall(MatSetType(Rt_dense, MATSEQDENSE)); 69 PetscCall(MatSeqDenseSetPreallocation(Rt_dense, NULL)); 70 71 Rt_dense->assembled = PETSC_TRUE; 72 rart->Rt = Rt_dense; 73 74 /* Create RARt_dense = R*A*Rt_dense */ 75 PetscCall(MatCreate(PETSC_COMM_SELF, &RARt_dense)); 76 PetscCall(MatSetSizes(RARt_dense, C->rmap->n, matcoloring->ncolors, C->rmap->n, matcoloring->ncolors)); 77 PetscCall(MatSetType(RARt_dense, MATSEQDENSE)); 78 PetscCall(MatSeqDenseSetPreallocation(RARt_dense, NULL)); 79 80 rart->RARt = RARt_dense; 81 82 /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */ 83 PetscCall(PetscMalloc1(A->rmap->n * 4, &rart->work)); 84 85 /* clean up */ 86 PetscCall(MatDestroy(&P)); 87 88 #if defined(PETSC_USE_INFO) 89 { 90 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 91 PetscReal density = (PetscReal)(c->nz) / (RARt_dense->rmap->n * RARt_dense->cmap->n); 92 PetscCall(PetscInfo(C, "C=R*(A*Rt) via coloring C - use sparse-dense inner products\n")); 93 PetscCall(PetscInfo(C, "RARt_den %" PetscInt_FMT " %" PetscInt_FMT "; Rt %" PetscInt_FMT " %" PetscInt_FMT " (RARt->nz %" PetscInt_FMT ")/(m*ncolors)=%g\n", RARt_dense->rmap->n, RARt_dense->cmap->n, R->cmap->n, R->rmap->n, c->nz, (double)density)); 94 } 95 #endif 96 PetscFunctionReturn(0); 97 } 98 99 /* 100 RAB = R * A * B, R and A in seqaij format, B in dense format; 101 */ 102 PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R, Mat A, Mat B, Mat RAB, PetscScalar *work) { 103 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *r = (Mat_SeqAIJ *)R->data; 104 PetscScalar r1, r2, r3, r4; 105 const PetscScalar *b, *b1, *b2, *b3, *b4; 106 MatScalar *aa, *ra; 107 PetscInt cn = B->cmap->n, bm = B->rmap->n, col, i, j, n, *ai = a->i, *aj, am = A->rmap->n; 108 PetscInt am2 = 2 * am, am3 = 3 * am, bm4 = 4 * bm; 109 PetscScalar *d, *c, *c2, *c3, *c4; 110 PetscInt *rj, rm = R->rmap->n, dm = RAB->rmap->n, dn = RAB->cmap->n; 111 PetscInt rm2 = 2 * rm, rm3 = 3 * rm, colrm; 112 113 PetscFunctionBegin; 114 if (!dm || !dn) PetscFunctionReturn(0); 115 PetscCheck(bm == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT, A->cmap->n, bm); 116 PetscCheck(am == R->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in R %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT, R->cmap->n, am); 117 PetscCheck(R->rmap->n == RAB->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows in RAB %" PetscInt_FMT " not equal rows in R %" PetscInt_FMT, RAB->rmap->n, R->rmap->n); 118 PetscCheck(B->cmap->n == RAB->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in RAB %" PetscInt_FMT " not equal columns in B %" PetscInt_FMT, RAB->cmap->n, B->cmap->n); 119 120 { /* 121 This approach is not as good as original ones (will be removed later), but it reveals that 122 AB_den=A*B takes almost all execution time in R*A*B for src/ksp/ksp/tutorials/ex56.c 123 */ 124 PetscBool via_matmatmult = PETSC_FALSE; 125 PetscCall(PetscOptionsGetBool(NULL, NULL, "-matrart_via_matmatmult", &via_matmatmult, NULL)); 126 if (via_matmatmult) { 127 Mat AB_den = NULL; 128 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &AB_den)); 129 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqDense(A, B, 0.0, AB_den)); 130 PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A, B, AB_den)); 131 PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(R, AB_den, RAB)); 132 PetscCall(MatDestroy(&AB_den)); 133 PetscFunctionReturn(0); 134 } 135 } 136 137 PetscCall(MatDenseGetArrayRead(B, &b)); 138 PetscCall(MatDenseGetArray(RAB, &d)); 139 b1 = b; 140 b2 = b1 + bm; 141 b3 = b2 + bm; 142 b4 = b3 + bm; 143 c = work; 144 c2 = c + am; 145 c3 = c2 + am; 146 c4 = c3 + am; 147 for (col = 0; col < cn - 4; col += 4) { /* over columns of C */ 148 for (i = 0; i < am; i++) { /* over rows of A in those columns */ 149 r1 = r2 = r3 = r4 = 0.0; 150 n = ai[i + 1] - ai[i]; 151 aj = a->j + ai[i]; 152 aa = a->a + ai[i]; 153 for (j = 0; j < n; j++) { 154 r1 += (*aa) * b1[*aj]; 155 r2 += (*aa) * b2[*aj]; 156 r3 += (*aa) * b3[*aj]; 157 r4 += (*aa++) * b4[*aj++]; 158 } 159 c[i] = r1; 160 c[am + i] = r2; 161 c[am2 + i] = r3; 162 c[am3 + i] = r4; 163 } 164 b1 += bm4; 165 b2 += bm4; 166 b3 += bm4; 167 b4 += bm4; 168 169 /* RAB[:,col] = R*C[:,col] */ 170 colrm = col * rm; 171 for (i = 0; i < rm; i++) { /* over rows of R in those columns */ 172 r1 = r2 = r3 = r4 = 0.0; 173 n = r->i[i + 1] - r->i[i]; 174 rj = r->j + r->i[i]; 175 ra = r->a + r->i[i]; 176 for (j = 0; j < n; j++) { 177 r1 += (*ra) * c[*rj]; 178 r2 += (*ra) * c2[*rj]; 179 r3 += (*ra) * c3[*rj]; 180 r4 += (*ra++) * c4[*rj++]; 181 } 182 d[colrm + i] = r1; 183 d[colrm + rm + i] = r2; 184 d[colrm + rm2 + i] = r3; 185 d[colrm + rm3 + i] = r4; 186 } 187 } 188 for (; col < cn; col++) { /* over extra columns of C */ 189 for (i = 0; i < am; i++) { /* over rows of A in those columns */ 190 r1 = 0.0; 191 n = a->i[i + 1] - a->i[i]; 192 aj = a->j + a->i[i]; 193 aa = a->a + a->i[i]; 194 for (j = 0; j < n; j++) { r1 += (*aa++) * b1[*aj++]; } 195 c[i] = r1; 196 } 197 b1 += bm; 198 199 for (i = 0; i < rm; i++) { /* over rows of R in those columns */ 200 r1 = 0.0; 201 n = r->i[i + 1] - r->i[i]; 202 rj = r->j + r->i[i]; 203 ra = r->a + r->i[i]; 204 for (j = 0; j < n; j++) { r1 += (*ra++) * c[*rj++]; } 205 d[col * rm + i] = r1; 206 } 207 } 208 PetscCall(PetscLogFlops(cn * 2.0 * (a->nz + r->nz))); 209 210 PetscCall(MatDenseRestoreArrayRead(B, &b)); 211 PetscCall(MatDenseRestoreArray(RAB, &d)); 212 PetscCall(MatAssemblyBegin(RAB, MAT_FINAL_ASSEMBLY)); 213 PetscCall(MatAssemblyEnd(RAB, MAT_FINAL_ASSEMBLY)); 214 PetscFunctionReturn(0); 215 } 216 217 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, Mat C) { 218 Mat_RARt *rart; 219 MatTransposeColoring matcoloring; 220 Mat Rt, RARt; 221 222 PetscFunctionBegin; 223 MatCheckProduct(C, 3); 224 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 225 rart = (Mat_RARt *)C->product->data; 226 227 /* Get dense Rt by Apply MatTransposeColoring to R */ 228 matcoloring = rart->matcoloring; 229 Rt = rart->Rt; 230 PetscCall(MatTransColoringApplySpToDen(matcoloring, R, Rt)); 231 232 /* Get dense RARt = R*A*Rt -- dominates! */ 233 RARt = rart->RARt; 234 PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R, A, Rt, RARt, rart->work)); 235 236 /* Recover C from C_dense */ 237 PetscCall(MatTransColoringApplyDenToSp(matcoloring, RARt, C)); 238 PetscFunctionReturn(0); 239 } 240 241 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, PetscReal fill, Mat C) { 242 Mat ARt; 243 Mat_RARt *rart; 244 char *alg; 245 246 PetscFunctionBegin; 247 MatCheckProduct(C, 4); 248 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 249 /* create symbolic ARt = A*R^T */ 250 PetscCall(MatProductCreate(A, R, NULL, &ARt)); 251 PetscCall(MatProductSetType(ARt, MATPRODUCT_ABt)); 252 PetscCall(MatProductSetAlgorithm(ARt, "sorted")); 253 PetscCall(MatProductSetFill(ARt, fill)); 254 PetscCall(MatProductSetFromOptions(ARt)); 255 PetscCall(MatProductSymbolic(ARt)); 256 257 /* compute symbolic C = R*ARt */ 258 /* set algorithm for C = R*ARt */ 259 PetscCall(PetscStrallocpy(C->product->alg, &alg)); 260 PetscCall(MatProductSetAlgorithm(C, "sorted")); 261 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(R, ARt, fill, C)); 262 /* resume original algorithm for C */ 263 PetscCall(MatProductSetAlgorithm(C, alg)); 264 PetscCall(PetscFree(alg)); 265 266 C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult; 267 268 PetscCall(PetscNew(&rart)); 269 rart->ARt = ARt; 270 C->product->data = rart; 271 C->product->destroy = MatDestroy_SeqAIJ_RARt; 272 PetscCall(PetscInfo(C, "Use ARt=A*R^T, C=R*ARt via MatMatTransposeMult(). Coloring can be applied to A*R^T.\n")); 273 PetscFunctionReturn(0); 274 } 275 276 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, Mat C) { 277 Mat_RARt *rart; 278 279 PetscFunctionBegin; 280 MatCheckProduct(C, 3); 281 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 282 rart = (Mat_RARt *)C->product->data; 283 PetscCall(MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A, R, rart->ARt)); /* dominate! */ 284 PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(R, rart->ARt, C)); 285 PetscFunctionReturn(0); 286 } 287 288 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A, Mat R, PetscReal fill, Mat C) { 289 Mat Rt; 290 Mat_RARt *rart; 291 292 PetscFunctionBegin; 293 MatCheckProduct(C, 4); 294 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 295 PetscCall(MatTranspose(R, MAT_INITIAL_MATRIX, &Rt)); 296 PetscCall(MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ(R, A, Rt, fill, C)); 297 298 PetscCall(PetscNew(&rart)); 299 rart->data = C->product->data; 300 rart->destroy = C->product->destroy; 301 rart->Rt = Rt; 302 C->product->data = rart; 303 C->product->destroy = MatDestroy_SeqAIJ_RARt; 304 C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ; 305 PetscCall(PetscInfo(C, "Use Rt=R^T and C=R*A*Rt via MatMatMatMult() to avoid sparse inner products\n")); 306 PetscFunctionReturn(0); 307 } 308 309 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A, Mat R, Mat C) { 310 Mat_RARt *rart; 311 312 PetscFunctionBegin; 313 MatCheckProduct(C, 3); 314 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 315 rart = (Mat_RARt *)C->product->data; 316 PetscCall(MatTranspose(R, MAT_REUSE_MATRIX, &rart->Rt)); 317 /* MatMatMatMultSymbolic used a different data */ 318 C->product->data = rart->data; 319 PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqAIJ(R, A, rart->Rt, C)); 320 C->product->data = rart; 321 PetscFunctionReturn(0); 322 } 323 324 PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A, Mat R, MatReuse scall, PetscReal fill, Mat *C) { 325 const char *algTypes[3] = {"matmatmatmult", "matmattransposemult", "coloring_rart"}; 326 PetscInt alg = 0; /* set default algorithm */ 327 328 PetscFunctionBegin; 329 if (scall == MAT_INITIAL_MATRIX) { 330 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "MatRARt", "Mat"); 331 PetscCall(PetscOptionsEList("-matrart_via", "Algorithmic approach", "MatRARt", algTypes, 3, algTypes[0], &alg, NULL)); 332 PetscOptionsEnd(); 333 334 PetscCall(PetscLogEventBegin(MAT_RARtSymbolic, A, R, 0, 0)); 335 PetscCall(MatCreate(PETSC_COMM_SELF, C)); 336 switch (alg) { 337 case 1: 338 /* via matmattransposemult: ARt=A*R^T, C=R*ARt - matrix coloring can be applied to A*R^T */ 339 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, *C)); 340 break; 341 case 2: 342 /* via coloring_rart: apply coloring C = R*A*R^T */ 343 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, *C)); 344 break; 345 default: 346 /* via matmatmatmult: Rt=R^T, C=R*A*Rt - avoid inefficient sparse inner products */ 347 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, *C)); 348 break; 349 } 350 PetscCall(PetscLogEventEnd(MAT_RARtSymbolic, A, R, 0, 0)); 351 } 352 353 PetscCall(PetscLogEventBegin(MAT_RARtNumeric, A, R, 0, 0)); 354 PetscCall(((*C)->ops->rartnumeric)(A, R, *C)); 355 PetscCall(PetscLogEventEnd(MAT_RARtNumeric, A, R, 0, 0)); 356 PetscFunctionReturn(0); 357 } 358 359 /* ------------------------------------------------------------- */ 360 PetscErrorCode MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C) { 361 Mat_Product *product = C->product; 362 Mat A = product->A, R = product->B; 363 MatProductAlgorithm alg = product->alg; 364 PetscReal fill = product->fill; 365 PetscBool flg; 366 367 PetscFunctionBegin; 368 PetscCall(PetscStrcmp(alg, "r*a*rt", &flg)); 369 if (flg) { 370 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, C)); 371 goto next; 372 } 373 374 PetscCall(PetscStrcmp(alg, "r*art", &flg)); 375 if (flg) { 376 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, C)); 377 goto next; 378 } 379 380 PetscCall(PetscStrcmp(alg, "coloring_rart", &flg)); 381 if (flg) { 382 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, C)); 383 goto next; 384 } 385 386 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatProductAlgorithm is not supported"); 387 388 next: 389 C->ops->productnumeric = MatProductNumeric_RARt; 390 PetscFunctionReturn(0); 391 } 392