1 2 /* 3 Defines projective product routines where A is a SeqAIJ matrix 4 C = R * A * R^T 5 */ 6 7 #include <../src/mat/impls/aij/seq/aij.h> 8 #include <../src/mat/utils/freespace.h> 9 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 10 11 PetscErrorCode MatDestroy_SeqAIJ_RARt(void *data) 12 { 13 PetscErrorCode ierr; 14 Mat_RARt *rart = (Mat_RARt*)data; 15 16 PetscFunctionBegin; 17 ierr = MatTransposeColoringDestroy(&rart->matcoloring);CHKERRQ(ierr); 18 ierr = MatDestroy(&rart->Rt);CHKERRQ(ierr); 19 ierr = MatDestroy(&rart->RARt);CHKERRQ(ierr); 20 ierr = MatDestroy(&rart->ARt);CHKERRQ(ierr); 21 ierr = PetscFree(rart->work);CHKERRQ(ierr); 22 if (rart->destroy) { 23 ierr = (*rart->destroy)(rart->data);CHKERRQ(ierr); 24 } 25 ierr = PetscFree(rart);CHKERRQ(ierr); 26 PetscFunctionReturn(0); 27 } 28 29 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A,Mat R,PetscReal fill,Mat C) 30 { 31 PetscErrorCode ierr; 32 Mat P; 33 PetscInt *rti,*rtj; 34 Mat_RARt *rart; 35 MatColoring coloring; 36 MatTransposeColoring matcoloring; 37 ISColoring iscoloring; 38 Mat Rt_dense,RARt_dense; 39 40 PetscFunctionBegin; 41 MatCheckProduct(C,4); 42 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 43 /* create symbolic P=Rt */ 44 ierr = MatGetSymbolicTranspose_SeqAIJ(R,&rti,&rtj);CHKERRQ(ierr); 45 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,R->cmap->n,R->rmap->n,rti,rtj,NULL,&P);CHKERRQ(ierr); 46 47 /* get symbolic C=Pt*A*P */ 48 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A,P,fill,C);CHKERRQ(ierr); 49 ierr = MatSetBlockSizes(C,PetscAbs(R->rmap->bs),PetscAbs(R->rmap->bs));CHKERRQ(ierr); 50 C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart; 51 52 /* create a supporting struct */ 53 ierr = PetscNew(&rart);CHKERRQ(ierr); 54 C->product->data = rart; 55 C->product->destroy = MatDestroy_SeqAIJ_RARt; 56 57 /* ------ Use coloring ---------- */ 58 /* inode causes memory problem */ 59 ierr = MatSetOption(C,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 60 61 /* Create MatTransposeColoring from symbolic C=R*A*R^T */ 62 ierr = MatColoringCreate(C,&coloring);CHKERRQ(ierr); 63 ierr = MatColoringSetDistance(coloring,2);CHKERRQ(ierr); 64 ierr = MatColoringSetType(coloring,MATCOLORINGSL);CHKERRQ(ierr); 65 ierr = MatColoringSetFromOptions(coloring);CHKERRQ(ierr); 66 ierr = MatColoringApply(coloring,&iscoloring);CHKERRQ(ierr); 67 ierr = MatColoringDestroy(&coloring);CHKERRQ(ierr); 68 ierr = MatTransposeColoringCreate(C,iscoloring,&matcoloring);CHKERRQ(ierr); 69 70 rart->matcoloring = matcoloring; 71 ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr); 72 73 /* Create Rt_dense */ 74 ierr = MatCreate(PETSC_COMM_SELF,&Rt_dense);CHKERRQ(ierr); 75 ierr = MatSetSizes(Rt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr); 76 ierr = MatSetType(Rt_dense,MATSEQDENSE);CHKERRQ(ierr); 77 ierr = MatSeqDenseSetPreallocation(Rt_dense,NULL);CHKERRQ(ierr); 78 79 Rt_dense->assembled = PETSC_TRUE; 80 rart->Rt = Rt_dense; 81 82 /* Create RARt_dense = R*A*Rt_dense */ 83 ierr = MatCreate(PETSC_COMM_SELF,&RARt_dense);CHKERRQ(ierr); 84 ierr = MatSetSizes(RARt_dense,C->rmap->n,matcoloring->ncolors,C->rmap->n,matcoloring->ncolors);CHKERRQ(ierr); 85 ierr = MatSetType(RARt_dense,MATSEQDENSE);CHKERRQ(ierr); 86 ierr = MatSeqDenseSetPreallocation(RARt_dense,NULL);CHKERRQ(ierr); 87 88 rart->RARt = RARt_dense; 89 90 /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */ 91 ierr = PetscMalloc1(A->rmap->n*4,&rart->work);CHKERRQ(ierr); 92 93 /* clean up */ 94 ierr = MatRestoreSymbolicTranspose_SeqAIJ(R,&rti,&rtj);CHKERRQ(ierr); 95 ierr = MatDestroy(&P);CHKERRQ(ierr); 96 97 #if defined(PETSC_USE_INFO) 98 { 99 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 100 PetscReal density = (PetscReal)(c->nz)/(RARt_dense->rmap->n*RARt_dense->cmap->n); 101 ierr = PetscInfo(C,"C=R*(A*Rt) via coloring C - use sparse-dense inner products\n");CHKERRQ(ierr); 102 ierr = PetscInfo6(C,"RARt_den %D %D; Rt %D %D (RARt->nz %D)/(m*ncolors)=%g\n",RARt_dense->rmap->n,RARt_dense->cmap->n,R->cmap->n,R->rmap->n,c->nz,density);CHKERRQ(ierr); 103 } 104 #endif 105 PetscFunctionReturn(0); 106 } 107 108 /* 109 RAB = R * A * B, R and A in seqaij format, B in dense format; 110 */ 111 PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R,Mat A,Mat B,Mat RAB,PetscScalar *work) 112 { 113 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*r=(Mat_SeqAIJ*)R->data; 114 PetscErrorCode ierr; 115 PetscScalar r1,r2,r3,r4; 116 const PetscScalar *b,*b1,*b2,*b3,*b4; 117 MatScalar *aa,*ra; 118 PetscInt cn =B->cmap->n,bm=B->rmap->n,col,i,j,n,*ai=a->i,*aj,am=A->rmap->n; 119 PetscInt am2=2*am,am3=3*am,bm4=4*bm; 120 PetscScalar *d,*c,*c2,*c3,*c4; 121 PetscInt *rj,rm=R->rmap->n,dm=RAB->rmap->n,dn=RAB->cmap->n; 122 PetscInt rm2=2*rm,rm3=3*rm,colrm; 123 124 PetscFunctionBegin; 125 if (!dm || !dn) PetscFunctionReturn(0); 126 if (bm != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,bm); 127 if (am != R->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in R %D not equal rows in A %D\n",R->cmap->n,am); 128 if (R->rmap->n != RAB->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in RAB %D not equal rows in R %D\n",RAB->rmap->n,R->rmap->n); 129 if (B->cmap->n != RAB->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in RAB %D not equal columns in B %D\n",RAB->cmap->n,B->cmap->n); 130 131 { /* 132 This approach is not as good as original ones (will be removed later), but it reveals that 133 AB_den=A*B takes almost all execution time in R*A*B for src/ksp/ksp/tutorials/ex56.c 134 */ 135 PetscBool via_matmatmult=PETSC_FALSE; 136 ierr = PetscOptionsGetBool(NULL,NULL,"-matrart_via_matmatmult",&via_matmatmult,NULL);CHKERRQ(ierr); 137 if (via_matmatmult) { 138 Mat AB_den = NULL; 139 ierr = MatCreate(PetscObjectComm((PetscObject)A),&AB_den);CHKERRQ(ierr); 140 ierr = MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,0.0,AB_den);CHKERRQ(ierr); 141 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,B,AB_den);CHKERRQ(ierr); 142 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(R,AB_den,RAB);CHKERRQ(ierr); 143 ierr = MatDestroy(&AB_den);CHKERRQ(ierr); 144 PetscFunctionReturn(0); 145 } 146 } 147 148 ierr = MatDenseGetArrayRead(B,&b);CHKERRQ(ierr); 149 ierr = MatDenseGetArray(RAB,&d);CHKERRQ(ierr); 150 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 151 c = work; c2 = c + am; c3 = c2 + am; c4 = c3 + am; 152 for (col=0; col<cn-4; col += 4) { /* over columns of C */ 153 for (i=0; i<am; i++) { /* over rows of A in those columns */ 154 r1 = r2 = r3 = r4 = 0.0; 155 n = ai[i+1] - ai[i]; 156 aj = a->j + ai[i]; 157 aa = a->a + ai[i]; 158 for (j=0; j<n; j++) { 159 r1 += (*aa)*b1[*aj]; 160 r2 += (*aa)*b2[*aj]; 161 r3 += (*aa)*b3[*aj]; 162 r4 += (*aa++)*b4[*aj++]; 163 } 164 c[i] = r1; 165 c[am + i] = r2; 166 c[am2 + i] = r3; 167 c[am3 + i] = r4; 168 } 169 b1 += bm4; 170 b2 += bm4; 171 b3 += bm4; 172 b4 += bm4; 173 174 /* RAB[:,col] = R*C[:,col] */ 175 colrm = col*rm; 176 for (i=0; i<rm; i++) { /* over rows of R in those columns */ 177 r1 = r2 = r3 = r4 = 0.0; 178 n = r->i[i+1] - r->i[i]; 179 rj = r->j + r->i[i]; 180 ra = r->a + r->i[i]; 181 for (j=0; j<n; j++) { 182 r1 += (*ra)*c[*rj]; 183 r2 += (*ra)*c2[*rj]; 184 r3 += (*ra)*c3[*rj]; 185 r4 += (*ra++)*c4[*rj++]; 186 } 187 d[colrm + i] = r1; 188 d[colrm + rm + i] = r2; 189 d[colrm + rm2 + i] = r3; 190 d[colrm + rm3 + i] = r4; 191 } 192 } 193 for (; col<cn; col++) { /* over extra columns of C */ 194 for (i=0; i<am; i++) { /* over rows of A in those columns */ 195 r1 = 0.0; 196 n = a->i[i+1] - a->i[i]; 197 aj = a->j + a->i[i]; 198 aa = a->a + a->i[i]; 199 for (j=0; j<n; j++) { 200 r1 += (*aa++)*b1[*aj++]; 201 } 202 c[i] = r1; 203 } 204 b1 += bm; 205 206 for (i=0; i<rm; i++) { /* over rows of R in those columns */ 207 r1 = 0.0; 208 n = r->i[i+1] - r->i[i]; 209 rj = r->j + r->i[i]; 210 ra = r->a + r->i[i]; 211 for (j=0; j<n; j++) { 212 r1 += (*ra++)*c[*rj++]; 213 } 214 d[col*rm + i] = r1; 215 } 216 } 217 ierr = PetscLogFlops(cn*2.0*(a->nz + r->nz));CHKERRQ(ierr); 218 219 ierr = MatDenseRestoreArrayRead(B,&b);CHKERRQ(ierr); 220 ierr = MatDenseRestoreArray(RAB,&d);CHKERRQ(ierr); 221 ierr = MatAssemblyBegin(RAB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 222 ierr = MatAssemblyEnd(RAB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 223 PetscFunctionReturn(0); 224 } 225 226 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A,Mat R,Mat C) 227 { 228 PetscErrorCode ierr; 229 Mat_RARt *rart; 230 MatTransposeColoring matcoloring; 231 Mat Rt,RARt; 232 233 PetscFunctionBegin; 234 MatCheckProduct(C,3); 235 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 236 rart = (Mat_RARt*)C->product->data; 237 238 /* Get dense Rt by Apply MatTransposeColoring to R */ 239 matcoloring = rart->matcoloring; 240 Rt = rart->Rt; 241 ierr = MatTransColoringApplySpToDen(matcoloring,R,Rt);CHKERRQ(ierr); 242 243 /* Get dense RARt = R*A*Rt -- dominates! */ 244 RARt = rart->RARt; 245 ierr = MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R,A,Rt,RARt,rart->work);CHKERRQ(ierr); 246 247 /* Recover C from C_dense */ 248 ierr = MatTransColoringApplyDenToSp(matcoloring,RARt,C);CHKERRQ(ierr); 249 PetscFunctionReturn(0); 250 } 251 252 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A,Mat R,PetscReal fill,Mat C) 253 { 254 PetscErrorCode ierr; 255 Mat ARt; 256 Mat_RARt *rart; 257 char *alg; 258 259 PetscFunctionBegin; 260 MatCheckProduct(C,4); 261 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 262 /* create symbolic ARt = A*R^T */ 263 ierr = MatProductCreate(A,R,NULL,&ARt);CHKERRQ(ierr); 264 ierr = MatProductSetType(ARt,MATPRODUCT_ABt);CHKERRQ(ierr); 265 ierr = MatProductSetAlgorithm(ARt,"sorted");CHKERRQ(ierr); 266 ierr = MatProductSetFill(ARt,fill);CHKERRQ(ierr); 267 ierr = MatProductSetFromOptions(ARt);CHKERRQ(ierr); 268 ierr = MatProductSymbolic(ARt);CHKERRQ(ierr); 269 270 /* compute symbolic C = R*ARt */ 271 /* set algorithm for C = R*ARt */ 272 ierr = PetscStrallocpy(C->product->alg,&alg);CHKERRQ(ierr); 273 ierr = MatProductSetAlgorithm(C,"sorted");CHKERRQ(ierr); 274 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(R,ARt,fill,C);CHKERRQ(ierr); 275 /* resume original algorithm for C */ 276 ierr = MatProductSetAlgorithm(C,alg);CHKERRQ(ierr); 277 ierr = PetscFree(alg);CHKERRQ(ierr); 278 279 C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult; 280 281 ierr = PetscNew(&rart);CHKERRQ(ierr); 282 rart->ARt = ARt; 283 C->product->data = rart; 284 C->product->destroy = MatDestroy_SeqAIJ_RARt; 285 ierr = PetscInfo(C,"Use ARt=A*R^T, C=R*ARt via MatMatTransposeMult(). Coloring can be applied to A*R^T.\n");CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A,Mat R,Mat C) 290 { 291 PetscErrorCode ierr; 292 Mat_RARt *rart; 293 294 PetscFunctionBegin; 295 MatCheckProduct(C,3); 296 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 297 rart = (Mat_RARt*)C->product->data; 298 ierr = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A,R,rart->ARt);CHKERRQ(ierr); /* dominate! */ 299 ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(R,rart->ARt,C);CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat R,PetscReal fill,Mat C) 304 { 305 PetscErrorCode ierr; 306 Mat Rt; 307 Mat_RARt *rart; 308 309 PetscFunctionBegin; 310 MatCheckProduct(C,4); 311 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 312 ierr = MatTranspose_SeqAIJ(R,MAT_INITIAL_MATRIX,&Rt);CHKERRQ(ierr); 313 ierr = MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ(R,A,Rt,fill,C);CHKERRQ(ierr); 314 315 ierr = PetscNew(&rart);CHKERRQ(ierr); 316 rart->data = C->product->data; 317 rart->destroy = C->product->destroy; 318 rart->Rt = Rt; 319 C->product->data = rart; 320 C->product->destroy = MatDestroy_SeqAIJ_RARt; 321 C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ; 322 ierr = PetscInfo(C,"Use Rt=R^T and C=R*A*Rt via MatMatMatMult() to avoid sparse inner products\n");CHKERRQ(ierr); 323 PetscFunctionReturn(0); 324 } 325 326 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A,Mat R,Mat C) 327 { 328 PetscErrorCode ierr; 329 Mat_RARt *rart; 330 331 PetscFunctionBegin; 332 MatCheckProduct(C,3); 333 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 334 rart = (Mat_RARt*)C->product->data; 335 ierr = MatTranspose_SeqAIJ(R,MAT_REUSE_MATRIX,&rart->Rt);CHKERRQ(ierr); 336 /* MatMatMatMultSymbolic used a different data */ 337 C->product->data = rart->data; 338 ierr = MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqAIJ(R,A,rart->Rt,C);CHKERRQ(ierr); 339 C->product->data = rart; 340 PetscFunctionReturn(0); 341 } 342 343 PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A,Mat R,MatReuse scall,PetscReal fill,Mat *C) 344 { 345 PetscErrorCode ierr; 346 const char *algTypes[3] = {"matmatmatmult","matmattransposemult","coloring_rart"}; 347 PetscInt alg=0; /* set default algorithm */ 348 349 PetscFunctionBegin; 350 if (scall == MAT_INITIAL_MATRIX) { 351 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatRARt","Mat");CHKERRQ(ierr); 352 ierr = PetscOptionsEList("-matrart_via","Algorithmic approach","MatRARt",algTypes,3,algTypes[0],&alg,NULL);CHKERRQ(ierr); 353 ierr = PetscOptionsEnd();CHKERRQ(ierr); 354 355 ierr = PetscLogEventBegin(MAT_RARtSymbolic,A,R,0,0);CHKERRQ(ierr); 356 ierr = MatCreate(PETSC_COMM_SELF,C);CHKERRQ(ierr); 357 switch (alg) { 358 case 1: 359 /* via matmattransposemult: ARt=A*R^T, C=R*ARt - matrix coloring can be applied to A*R^T */ 360 ierr = MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A,R,fill,*C);CHKERRQ(ierr); 361 break; 362 case 2: 363 /* via coloring_rart: apply coloring C = R*A*R^T */ 364 ierr = MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A,R,fill,*C);CHKERRQ(ierr); 365 break; 366 default: 367 /* via matmatmatmult: Rt=R^T, C=R*A*Rt - avoid inefficient sparse inner products */ 368 ierr = MatRARtSymbolic_SeqAIJ_SeqAIJ(A,R,fill,*C);CHKERRQ(ierr); 369 break; 370 } 371 ierr = PetscLogEventEnd(MAT_RARtSymbolic,A,R,0,0);CHKERRQ(ierr); 372 } 373 374 ierr = PetscLogEventBegin(MAT_RARtNumeric,A,R,0,0);CHKERRQ(ierr); 375 ierr = ((*C)->ops->rartnumeric)(A,R,*C);CHKERRQ(ierr); 376 ierr = PetscLogEventEnd(MAT_RARtNumeric,A,R,0,0);CHKERRQ(ierr); 377 PetscFunctionReturn(0); 378 } 379 380 /* ------------------------------------------------------------- */ 381 PetscErrorCode MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C) 382 { 383 PetscErrorCode ierr; 384 Mat_Product *product = C->product; 385 Mat A=product->A,R=product->B; 386 MatProductAlgorithm alg=product->alg; 387 PetscReal fill=product->fill; 388 PetscBool flg; 389 390 PetscFunctionBegin; 391 ierr = PetscStrcmp(alg,"r*a*rt",&flg);CHKERRQ(ierr); 392 if (flg) { 393 ierr = MatRARtSymbolic_SeqAIJ_SeqAIJ(A,R,fill,C);CHKERRQ(ierr); 394 goto next; 395 } 396 397 ierr = PetscStrcmp(alg,"r*art",&flg);CHKERRQ(ierr); 398 if (flg) { 399 ierr = MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A,R,fill,C);CHKERRQ(ierr); 400 goto next; 401 } 402 403 ierr = PetscStrcmp(alg,"coloring_rart",&flg);CHKERRQ(ierr); 404 if (flg) { 405 ierr = MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A,R,fill,C);CHKERRQ(ierr); 406 goto next; 407 } 408 409 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatProductAlgorithm is not supported"); 410 411 next: 412 C->ops->productnumeric = MatProductNumeric_RARt; 413 PetscFunctionReturn(0); 414 } 415