1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 63 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 64 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 65 * to differ from the parent matrix. */ 66 if (a->lvec) { 67 ierr = VecBindToCPU(a->lvec,flg);CHKERRQ(ierr); 68 } 69 if (a->diag) { 70 ierr = VecBindToCPU(a->diag,flg);CHKERRQ(ierr); 71 } 72 73 PetscFunctionReturn(0); 74 } 75 76 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 77 { 78 PetscErrorCode ierr; 79 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 80 81 PetscFunctionBegin; 82 if (mat->A) { 83 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 84 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 85 } 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 90 { 91 PetscErrorCode ierr; 92 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 93 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 94 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 95 const PetscInt *ia,*ib; 96 const MatScalar *aa,*bb,*aav,*bav; 97 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 98 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 99 100 PetscFunctionBegin; 101 *keptrows = NULL; 102 103 ia = a->i; 104 ib = b->i; 105 ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr); 106 ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr); 107 for (i=0; i<m; i++) { 108 na = ia[i+1] - ia[i]; 109 nb = ib[i+1] - ib[i]; 110 if (!na && !nb) { 111 cnt++; 112 goto ok1; 113 } 114 aa = aav + ia[i]; 115 for (j=0; j<na; j++) { 116 if (aa[j] != 0.0) goto ok1; 117 } 118 bb = bav + ib[i]; 119 for (j=0; j <nb; j++) { 120 if (bb[j] != 0.0) goto ok1; 121 } 122 cnt++; 123 ok1:; 124 } 125 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr); 126 if (!n0rows) { 127 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 128 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 132 cnt = 0; 133 for (i=0; i<m; i++) { 134 na = ia[i+1] - ia[i]; 135 nb = ib[i+1] - ib[i]; 136 if (!na && !nb) continue; 137 aa = aav + ia[i]; 138 for (j=0; j<na;j++) { 139 if (aa[j] != 0.0) { 140 rows[cnt++] = rstart + i; 141 goto ok2; 142 } 143 } 144 bb = bav + ib[i]; 145 for (j=0; j<nb; j++) { 146 if (bb[j] != 0.0) { 147 rows[cnt++] = rstart + i; 148 goto ok2; 149 } 150 } 151 ok2:; 152 } 153 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 154 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 155 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 156 PetscFunctionReturn(0); 157 } 158 159 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 160 { 161 PetscErrorCode ierr; 162 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 163 PetscBool cong; 164 165 PetscFunctionBegin; 166 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 167 if (Y->assembled && cong) { 168 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 169 } else { 170 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 171 } 172 PetscFunctionReturn(0); 173 } 174 175 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 176 { 177 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 178 PetscErrorCode ierr; 179 PetscInt i,rstart,nrows,*rows; 180 181 PetscFunctionBegin; 182 *zrows = NULL; 183 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 184 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 185 for (i=0; i<nrows; i++) rows[i] += rstart; 186 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 187 PetscFunctionReturn(0); 188 } 189 190 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 191 { 192 PetscErrorCode ierr; 193 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 194 PetscInt i,m,n,*garray = aij->garray; 195 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 196 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 197 PetscReal *work; 198 const PetscScalar *dummy; 199 200 PetscFunctionBegin; 201 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 202 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 203 ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr); 204 ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr); 205 ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr); 206 ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr); 207 if (type == NORM_2) { 208 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 209 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 210 } 211 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 212 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 213 } 214 } else if (type == NORM_1) { 215 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 216 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 217 } 218 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 219 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 220 } 221 } else if (type == NORM_INFINITY) { 222 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 223 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 224 } 225 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 226 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 227 } 228 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 229 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 230 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 231 } 232 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 233 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 234 } 235 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 236 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 237 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 238 } 239 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 240 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 241 } 242 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 243 if (type == NORM_INFINITY) { 244 ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 245 } else { 246 ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 247 } 248 ierr = PetscFree(work);CHKERRQ(ierr); 249 if (type == NORM_2) { 250 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 251 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 252 for (i=0; i<n; i++) reductions[i] /= m; 253 } 254 PetscFunctionReturn(0); 255 } 256 257 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 258 { 259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 260 IS sis,gis; 261 PetscErrorCode ierr; 262 const PetscInt *isis,*igis; 263 PetscInt n,*iis,nsis,ngis,rstart,i; 264 265 PetscFunctionBegin; 266 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 267 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 268 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 269 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 270 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 271 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 272 273 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 274 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 275 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 276 n = ngis + nsis; 277 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 278 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 279 for (i=0; i<n; i++) iis[i] += rstart; 280 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 281 282 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 283 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 284 ierr = ISDestroy(&sis);CHKERRQ(ierr); 285 ierr = ISDestroy(&gis);CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289 /* 290 Local utility routine that creates a mapping from the global column 291 number to the local number in the off-diagonal part of the local 292 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 293 a slightly higher hash table cost; without it it is not scalable (each processor 294 has an order N integer array but is fast to access. 295 */ 296 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 297 { 298 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 299 PetscErrorCode ierr; 300 PetscInt n = aij->B->cmap->n,i; 301 302 PetscFunctionBegin; 303 if (n && !aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 304 #if defined(PETSC_USE_CTABLE) 305 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 306 for (i=0; i<n; i++) { 307 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 308 } 309 #else 310 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 311 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 312 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 313 #endif 314 PetscFunctionReturn(0); 315 } 316 317 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 318 { \ 319 if (col <= lastcol1) low1 = 0; \ 320 else high1 = nrow1; \ 321 lastcol1 = col;\ 322 while (high1-low1 > 5) { \ 323 t = (low1+high1)/2; \ 324 if (rp1[t] > col) high1 = t; \ 325 else low1 = t; \ 326 } \ 327 for (_i=low1; _i<high1; _i++) { \ 328 if (rp1[_i] > col) break; \ 329 if (rp1[_i] == col) { \ 330 if (addv == ADD_VALUES) { \ 331 ap1[_i] += value; \ 332 /* Not sure LogFlops will slow dow the code or not */ \ 333 (void)PetscLogFlops(1.0); \ 334 } \ 335 else ap1[_i] = value; \ 336 goto a_noinsert; \ 337 } \ 338 } \ 339 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 340 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 341 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 342 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 343 N = nrow1++ - 1; a->nz++; high1++; \ 344 /* shift up all the later entries in this row */ \ 345 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 346 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 347 rp1[_i] = col; \ 348 ap1[_i] = value; \ 349 A->nonzerostate++;\ 350 a_noinsert: ; \ 351 ailen[row] = nrow1; \ 352 } 353 354 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 355 { \ 356 if (col <= lastcol2) low2 = 0; \ 357 else high2 = nrow2; \ 358 lastcol2 = col; \ 359 while (high2-low2 > 5) { \ 360 t = (low2+high2)/2; \ 361 if (rp2[t] > col) high2 = t; \ 362 else low2 = t; \ 363 } \ 364 for (_i=low2; _i<high2; _i++) { \ 365 if (rp2[_i] > col) break; \ 366 if (rp2[_i] == col) { \ 367 if (addv == ADD_VALUES) { \ 368 ap2[_i] += value; \ 369 (void)PetscLogFlops(1.0); \ 370 } \ 371 else ap2[_i] = value; \ 372 goto b_noinsert; \ 373 } \ 374 } \ 375 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 376 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 377 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 378 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 379 N = nrow2++ - 1; b->nz++; high2++; \ 380 /* shift up all the later entries in this row */ \ 381 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 382 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 383 rp2[_i] = col; \ 384 ap2[_i] = value; \ 385 B->nonzerostate++; \ 386 b_noinsert: ; \ 387 bilen[row] = nrow2; \ 388 } 389 390 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 391 { 392 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 393 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 394 PetscErrorCode ierr; 395 PetscInt l,*garray = mat->garray,diag; 396 PetscScalar *aa,*ba; 397 398 PetscFunctionBegin; 399 /* code only works for square matrices A */ 400 401 /* find size of row to the left of the diagonal part */ 402 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 403 row = row - diag; 404 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 405 if (garray[b->j[b->i[row]+l]] > diag) break; 406 } 407 if (l) { 408 ierr = MatSeqAIJGetArray(mat->B,&ba);CHKERRQ(ierr); 409 ierr = PetscArraycpy(ba+b->i[row],v,l);CHKERRQ(ierr); 410 ierr = MatSeqAIJRestoreArray(mat->B,&ba);CHKERRQ(ierr); 411 } 412 413 /* diagonal part */ 414 if (a->i[row+1]-a->i[row]) { 415 ierr = MatSeqAIJGetArray(mat->A,&aa);CHKERRQ(ierr); 416 ierr = PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 417 ierr = MatSeqAIJRestoreArray(mat->A,&aa);CHKERRQ(ierr); 418 } 419 420 /* right of diagonal part */ 421 if (b->i[row+1]-b->i[row]-l) { 422 ierr = MatSeqAIJGetArray(mat->B,&ba);CHKERRQ(ierr); 423 ierr = PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 424 ierr = MatSeqAIJRestoreArray(mat->B,&ba);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 430 { 431 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 432 PetscScalar value = 0.0; 433 PetscErrorCode ierr; 434 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 435 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 436 PetscBool roworiented = aij->roworiented; 437 438 /* Some Variables required in the macro */ 439 Mat A = aij->A; 440 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 441 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 442 PetscBool ignorezeroentries = a->ignorezeroentries; 443 Mat B = aij->B; 444 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 445 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 446 MatScalar *aa,*ba; 447 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 448 PetscInt nonew; 449 MatScalar *ap1,*ap2; 450 451 PetscFunctionBegin; 452 ierr = MatSeqAIJGetArray(A,&aa);CHKERRQ(ierr); 453 ierr = MatSeqAIJGetArray(B,&ba);CHKERRQ(ierr); 454 for (i=0; i<m; i++) { 455 if (im[i] < 0) continue; 456 if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 457 if (im[i] >= rstart && im[i] < rend) { 458 row = im[i] - rstart; 459 lastcol1 = -1; 460 rp1 = aj + ai[row]; 461 ap1 = aa + ai[row]; 462 rmax1 = aimax[row]; 463 nrow1 = ailen[row]; 464 low1 = 0; 465 high1 = nrow1; 466 lastcol2 = -1; 467 rp2 = bj + bi[row]; 468 ap2 = ba + bi[row]; 469 rmax2 = bimax[row]; 470 nrow2 = bilen[row]; 471 low2 = 0; 472 high2 = nrow2; 473 474 for (j=0; j<n; j++) { 475 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 476 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 477 if (in[j] >= cstart && in[j] < cend) { 478 col = in[j] - cstart; 479 nonew = a->nonew; 480 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 481 } else if (in[j] < 0) continue; 482 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 483 else { 484 if (mat->was_assembled) { 485 if (!aij->colmap) { 486 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 487 } 488 #if defined(PETSC_USE_CTABLE) 489 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); /* map global col ids to local ones */ 490 col--; 491 #else 492 col = aij->colmap[in[j]] - 1; 493 #endif 494 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 495 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); /* Change aij->B from reduced/local format to expanded/global format */ 496 col = in[j]; 497 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 498 B = aij->B; 499 b = (Mat_SeqAIJ*)B->data; 500 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 501 rp2 = bj + bi[row]; 502 ap2 = ba + bi[row]; 503 rmax2 = bimax[row]; 504 nrow2 = bilen[row]; 505 low2 = 0; 506 high2 = nrow2; 507 bm = aij->B->rmap->n; 508 ba = b->a; 509 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 510 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 511 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 512 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 513 } 514 } else col = in[j]; 515 nonew = b->nonew; 516 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 517 } 518 } 519 } else { 520 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 521 if (!aij->donotstash) { 522 mat->assembled = PETSC_FALSE; 523 if (roworiented) { 524 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 525 } else { 526 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 527 } 528 } 529 } 530 } 531 ierr = MatSeqAIJRestoreArray(A,&aa);CHKERRQ(ierr); 532 ierr = MatSeqAIJRestoreArray(B,&ba);CHKERRQ(ierr); 533 PetscFunctionReturn(0); 534 } 535 536 /* 537 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 538 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 539 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 540 */ 541 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 542 { 543 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 544 Mat A = aij->A; /* diagonal part of the matrix */ 545 Mat B = aij->B; /* offdiagonal part of the matrix */ 546 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 547 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 548 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 549 PetscInt *ailen = a->ilen,*aj = a->j; 550 PetscInt *bilen = b->ilen,*bj = b->j; 551 PetscInt am = aij->A->rmap->n,j; 552 PetscInt diag_so_far = 0,dnz; 553 PetscInt offd_so_far = 0,onz; 554 555 PetscFunctionBegin; 556 /* Iterate over all rows of the matrix */ 557 for (j=0; j<am; j++) { 558 dnz = onz = 0; 559 /* Iterate over all non-zero columns of the current row */ 560 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 561 /* If column is in the diagonal */ 562 if (mat_j[col] >= cstart && mat_j[col] < cend) { 563 aj[diag_so_far++] = mat_j[col] - cstart; 564 dnz++; 565 } else { /* off-diagonal entries */ 566 bj[offd_so_far++] = mat_j[col]; 567 onz++; 568 } 569 } 570 ailen[j] = dnz; 571 bilen[j] = onz; 572 } 573 PetscFunctionReturn(0); 574 } 575 576 /* 577 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 578 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 579 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 580 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 581 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 582 */ 583 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 584 { 585 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 586 Mat A = aij->A; /* diagonal part of the matrix */ 587 Mat B = aij->B; /* offdiagonal part of the matrix */ 588 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 589 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 590 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 591 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 592 PetscInt *ailen = a->ilen,*aj = a->j; 593 PetscInt *bilen = b->ilen,*bj = b->j; 594 PetscInt am = aij->A->rmap->n,j; 595 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 596 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 597 PetscScalar *aa = a->a,*ba = b->a; 598 599 PetscFunctionBegin; 600 /* Iterate over all rows of the matrix */ 601 for (j=0; j<am; j++) { 602 dnz_row = onz_row = 0; 603 rowstart_offd = full_offd_i[j]; 604 rowstart_diag = full_diag_i[j]; 605 /* Iterate over all non-zero columns of the current row */ 606 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 607 /* If column is in the diagonal */ 608 if (mat_j[col] >= cstart && mat_j[col] < cend) { 609 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 610 aa[rowstart_diag+dnz_row] = mat_a[col]; 611 dnz_row++; 612 } else { /* off-diagonal entries */ 613 bj[rowstart_offd+onz_row] = mat_j[col]; 614 ba[rowstart_offd+onz_row] = mat_a[col]; 615 onz_row++; 616 } 617 } 618 ailen[j] = dnz_row; 619 bilen[j] = onz_row; 620 } 621 PetscFunctionReturn(0); 622 } 623 624 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 625 { 626 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 627 PetscErrorCode ierr; 628 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 629 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 630 631 PetscFunctionBegin; 632 for (i=0; i<m; i++) { 633 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 634 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 635 if (idxm[i] >= rstart && idxm[i] < rend) { 636 row = idxm[i] - rstart; 637 for (j=0; j<n; j++) { 638 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 639 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 640 if (idxn[j] >= cstart && idxn[j] < cend) { 641 col = idxn[j] - cstart; 642 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 643 } else { 644 if (!aij->colmap) { 645 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 646 } 647 #if defined(PETSC_USE_CTABLE) 648 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 649 col--; 650 #else 651 col = aij->colmap[idxn[j]] - 1; 652 #endif 653 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 654 else { 655 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 656 } 657 } 658 } 659 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 660 } 661 PetscFunctionReturn(0); 662 } 663 664 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 665 { 666 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 667 PetscErrorCode ierr; 668 PetscInt nstash,reallocs; 669 670 PetscFunctionBegin; 671 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 672 673 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 674 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 675 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 676 PetscFunctionReturn(0); 677 } 678 679 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 680 { 681 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 682 PetscErrorCode ierr; 683 PetscMPIInt n; 684 PetscInt i,j,rstart,ncols,flg; 685 PetscInt *row,*col; 686 PetscBool other_disassembled; 687 PetscScalar *val; 688 689 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 690 691 PetscFunctionBegin; 692 if (!aij->donotstash && !mat->nooffprocentries) { 693 while (1) { 694 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 695 if (!flg) break; 696 697 for (i=0; i<n;) { 698 /* Now identify the consecutive vals belonging to the same row */ 699 for (j=i,rstart=row[j]; j<n; j++) { 700 if (row[j] != rstart) break; 701 } 702 if (j < n) ncols = j-i; 703 else ncols = n-i; 704 /* Now assemble all these values with a single function call */ 705 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 706 i = j; 707 } 708 } 709 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 710 } 711 #if defined(PETSC_HAVE_DEVICE) 712 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 713 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 714 if (mat->boundtocpu) { 715 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 716 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 717 } 718 #endif 719 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 720 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 721 722 /* determine if any processor has disassembled, if so we must 723 also disassemble ourself, in order that we may reassemble. */ 724 /* 725 if nonzero structure of submatrix B cannot change then we know that 726 no processor disassembled thus we can skip this stuff 727 */ 728 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 729 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 730 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 731 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 732 } 733 } 734 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 735 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 736 } 737 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 738 #if defined(PETSC_HAVE_DEVICE) 739 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 740 #endif 741 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 742 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 743 744 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 745 746 aij->rowvalues = NULL; 747 748 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 749 750 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 751 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 752 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 753 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 754 } 755 #if defined(PETSC_HAVE_DEVICE) 756 mat->offloadmask = PETSC_OFFLOAD_BOTH; 757 #endif 758 PetscFunctionReturn(0); 759 } 760 761 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 762 { 763 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 764 PetscErrorCode ierr; 765 766 PetscFunctionBegin; 767 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 768 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 769 PetscFunctionReturn(0); 770 } 771 772 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 773 { 774 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 775 PetscObjectState sA, sB; 776 PetscInt *lrows; 777 PetscInt r, len; 778 PetscBool cong, lch, gch; 779 PetscErrorCode ierr; 780 781 PetscFunctionBegin; 782 /* get locally owned rows */ 783 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 784 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 785 /* fix right hand side if needed */ 786 if (x && b) { 787 const PetscScalar *xx; 788 PetscScalar *bb; 789 790 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 791 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 792 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 793 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 794 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 795 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 796 } 797 798 sA = mat->A->nonzerostate; 799 sB = mat->B->nonzerostate; 800 801 if (diag != 0.0 && cong) { 802 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 803 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 804 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 805 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 806 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 807 PetscInt nnwA, nnwB; 808 PetscBool nnzA, nnzB; 809 810 nnwA = aijA->nonew; 811 nnwB = aijB->nonew; 812 nnzA = aijA->keepnonzeropattern; 813 nnzB = aijB->keepnonzeropattern; 814 if (!nnzA) { 815 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 816 aijA->nonew = 0; 817 } 818 if (!nnzB) { 819 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 820 aijB->nonew = 0; 821 } 822 /* Must zero here before the next loop */ 823 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 824 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 825 for (r = 0; r < len; ++r) { 826 const PetscInt row = lrows[r] + A->rmap->rstart; 827 if (row >= A->cmap->N) continue; 828 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 829 } 830 aijA->nonew = nnwA; 831 aijB->nonew = nnwB; 832 } else { 833 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 834 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 835 } 836 ierr = PetscFree(lrows);CHKERRQ(ierr); 837 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 838 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 839 840 /* reduce nonzerostate */ 841 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 842 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 843 if (gch) A->nonzerostate++; 844 PetscFunctionReturn(0); 845 } 846 847 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 848 { 849 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 850 PetscErrorCode ierr; 851 PetscMPIInt n = A->rmap->n; 852 PetscInt i,j,r,m,len = 0; 853 PetscInt *lrows,*owners = A->rmap->range; 854 PetscMPIInt p = 0; 855 PetscSFNode *rrows; 856 PetscSF sf; 857 const PetscScalar *xx; 858 PetscScalar *bb,*mask,*aij_a; 859 Vec xmask,lmask; 860 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 861 const PetscInt *aj, *ii,*ridx; 862 PetscScalar *aa; 863 864 PetscFunctionBegin; 865 /* Create SF where leaves are input rows and roots are owned rows */ 866 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 867 for (r = 0; r < n; ++r) lrows[r] = -1; 868 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 869 for (r = 0; r < N; ++r) { 870 const PetscInt idx = rows[r]; 871 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 872 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 873 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 874 } 875 rrows[r].rank = p; 876 rrows[r].index = rows[r] - owners[p]; 877 } 878 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 879 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 880 /* Collect flags for rows to be zeroed */ 881 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 882 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 883 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 884 /* Compress and put in row numbers */ 885 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 886 /* zero diagonal part of matrix */ 887 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 888 /* handle off diagonal part of matrix */ 889 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 890 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 891 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 892 for (i=0; i<len; i++) bb[lrows[i]] = 1; 893 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 894 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 895 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 897 if (x && b) { /* this code is buggy when the row and column layout don't match */ 898 PetscBool cong; 899 900 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 901 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 902 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 903 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 904 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 905 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 906 } 907 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 908 /* remove zeroed rows of off diagonal matrix */ 909 ierr = MatSeqAIJGetArray(l->B,&aij_a);CHKERRQ(ierr); 910 ii = aij->i; 911 for (i=0; i<len; i++) { 912 ierr = PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 913 } 914 /* loop over all elements of off process part of matrix zeroing removed columns*/ 915 if (aij->compressedrow.use) { 916 m = aij->compressedrow.nrows; 917 ii = aij->compressedrow.i; 918 ridx = aij->compressedrow.rindex; 919 for (i=0; i<m; i++) { 920 n = ii[i+1] - ii[i]; 921 aj = aij->j + ii[i]; 922 aa = aij_a + ii[i]; 923 924 for (j=0; j<n; j++) { 925 if (PetscAbsScalar(mask[*aj])) { 926 if (b) bb[*ridx] -= *aa*xx[*aj]; 927 *aa = 0.0; 928 } 929 aa++; 930 aj++; 931 } 932 ridx++; 933 } 934 } else { /* do not use compressed row format */ 935 m = l->B->rmap->n; 936 for (i=0; i<m; i++) { 937 n = ii[i+1] - ii[i]; 938 aj = aij->j + ii[i]; 939 aa = aij_a + ii[i]; 940 for (j=0; j<n; j++) { 941 if (PetscAbsScalar(mask[*aj])) { 942 if (b) bb[i] -= *aa*xx[*aj]; 943 *aa = 0.0; 944 } 945 aa++; 946 aj++; 947 } 948 } 949 } 950 if (x && b) { 951 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 952 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 953 } 954 ierr = MatSeqAIJRestoreArray(l->B,&aij_a);CHKERRQ(ierr); 955 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 956 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 957 ierr = PetscFree(lrows);CHKERRQ(ierr); 958 959 /* only change matrix nonzero state if pattern was allowed to be changed */ 960 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 961 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 962 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 963 } 964 PetscFunctionReturn(0); 965 } 966 967 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 968 { 969 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 970 PetscErrorCode ierr; 971 PetscInt nt; 972 VecScatter Mvctx = a->Mvctx; 973 974 PetscFunctionBegin; 975 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 976 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 977 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 978 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 979 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 980 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 985 { 986 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 987 PetscErrorCode ierr; 988 989 PetscFunctionBegin; 990 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 991 PetscFunctionReturn(0); 992 } 993 994 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 995 { 996 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 997 PetscErrorCode ierr; 998 VecScatter Mvctx = a->Mvctx; 999 1000 PetscFunctionBegin; 1001 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1003 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1004 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1009 { 1010 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1011 PetscErrorCode ierr; 1012 1013 PetscFunctionBegin; 1014 /* do nondiagonal part */ 1015 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1016 /* do local part */ 1017 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1018 /* add partial results together */ 1019 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1020 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1021 PetscFunctionReturn(0); 1022 } 1023 1024 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1025 { 1026 MPI_Comm comm; 1027 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1028 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1029 IS Me,Notme; 1030 PetscErrorCode ierr; 1031 PetscInt M,N,first,last,*notme,i; 1032 PetscBool lf; 1033 PetscMPIInt size; 1034 1035 PetscFunctionBegin; 1036 /* Easy test: symmetric diagonal block */ 1037 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1038 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1039 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr); 1040 if (!*f) PetscFunctionReturn(0); 1041 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1042 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 1043 if (size == 1) PetscFunctionReturn(0); 1044 1045 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1046 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1047 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1048 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1049 for (i=0; i<first; i++) notme[i] = i; 1050 for (i=last; i<M; i++) notme[i-last+first] = i; 1051 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1052 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1053 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1054 Aoff = Aoffs[0]; 1055 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1056 Boff = Boffs[0]; 1057 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1058 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1059 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1060 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1061 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1062 ierr = PetscFree(notme);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1067 { 1068 PetscErrorCode ierr; 1069 1070 PetscFunctionBegin; 1071 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1072 PetscFunctionReturn(0); 1073 } 1074 1075 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1076 { 1077 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1078 PetscErrorCode ierr; 1079 1080 PetscFunctionBegin; 1081 /* do nondiagonal part */ 1082 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1083 /* do local part */ 1084 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1085 /* add partial results together */ 1086 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1087 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } 1090 1091 /* 1092 This only works correctly for square matrices where the subblock A->A is the 1093 diagonal block 1094 */ 1095 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1096 { 1097 PetscErrorCode ierr; 1098 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1099 1100 PetscFunctionBegin; 1101 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1102 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1103 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1108 { 1109 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1110 PetscErrorCode ierr; 1111 1112 PetscFunctionBegin; 1113 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1114 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1119 { 1120 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1121 PetscErrorCode ierr; 1122 1123 PetscFunctionBegin; 1124 #if defined(PETSC_USE_LOG) 1125 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1126 #endif 1127 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1128 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1129 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1130 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1131 #if defined(PETSC_USE_CTABLE) 1132 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1133 #else 1134 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1135 #endif 1136 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1137 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1138 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1139 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1140 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1141 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1142 1143 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1144 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1145 1146 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1147 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1148 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1149 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1151 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1152 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1153 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1154 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1155 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1156 #if defined(PETSC_HAVE_CUDA) 1157 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1158 #endif 1159 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1160 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr); 1161 #endif 1162 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 1163 #if defined(PETSC_HAVE_ELEMENTAL) 1164 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1165 #endif 1166 #if defined(PETSC_HAVE_SCALAPACK) 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1168 #endif 1169 #if defined(PETSC_HAVE_HYPRE) 1170 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1172 #endif 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1174 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1175 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1176 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr); 1177 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr); 1178 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr); 1179 #if defined(PETSC_HAVE_MKL_SPARSE) 1180 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr); 1181 #endif 1182 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr); 1183 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1184 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1189 { 1190 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1191 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1192 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1193 const PetscInt *garray = aij->garray; 1194 const PetscScalar *aa,*ba; 1195 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1196 PetscInt *rowlens; 1197 PetscInt *colidxs; 1198 PetscScalar *matvals; 1199 PetscErrorCode ierr; 1200 1201 PetscFunctionBegin; 1202 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1203 1204 M = mat->rmap->N; 1205 N = mat->cmap->N; 1206 m = mat->rmap->n; 1207 rs = mat->rmap->rstart; 1208 cs = mat->cmap->rstart; 1209 nz = A->nz + B->nz; 1210 1211 /* write matrix header */ 1212 header[0] = MAT_FILE_CLASSID; 1213 header[1] = M; header[2] = N; header[3] = nz; 1214 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1215 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1216 1217 /* fill in and store row lengths */ 1218 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1219 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1220 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1221 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1222 1223 /* fill in and store column indices */ 1224 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1225 for (cnt=0, i=0; i<m; i++) { 1226 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1227 if (garray[B->j[jb]] > cs) break; 1228 colidxs[cnt++] = garray[B->j[jb]]; 1229 } 1230 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1231 colidxs[cnt++] = A->j[ja] + cs; 1232 for (; jb<B->i[i+1]; jb++) 1233 colidxs[cnt++] = garray[B->j[jb]]; 1234 } 1235 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1236 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1237 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1238 1239 /* fill in and store nonzero values */ 1240 ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr); 1241 ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr); 1242 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1243 for (cnt=0, i=0; i<m; i++) { 1244 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1245 if (garray[B->j[jb]] > cs) break; 1246 matvals[cnt++] = ba[jb]; 1247 } 1248 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1249 matvals[cnt++] = aa[ja]; 1250 for (; jb<B->i[i+1]; jb++) 1251 matvals[cnt++] = ba[jb]; 1252 } 1253 ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr); 1254 ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr); 1255 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1256 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1257 ierr = PetscFree(matvals);CHKERRQ(ierr); 1258 1259 /* write block size option to the viewer's .info file */ 1260 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1261 PetscFunctionReturn(0); 1262 } 1263 1264 #include <petscdraw.h> 1265 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1266 { 1267 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1268 PetscErrorCode ierr; 1269 PetscMPIInt rank = aij->rank,size = aij->size; 1270 PetscBool isdraw,iascii,isbinary; 1271 PetscViewer sviewer; 1272 PetscViewerFormat format; 1273 1274 PetscFunctionBegin; 1275 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1276 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1277 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1278 if (iascii) { 1279 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1280 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1281 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1282 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1283 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1284 for (i=0; i<(PetscInt)size; i++) { 1285 nmax = PetscMax(nmax,nz[i]); 1286 nmin = PetscMin(nmin,nz[i]); 1287 navg += nz[i]; 1288 } 1289 ierr = PetscFree(nz);CHKERRQ(ierr); 1290 navg = navg/size; 1291 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1292 PetscFunctionReturn(0); 1293 } 1294 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1295 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1296 MatInfo info; 1297 PetscInt *inodes=NULL; 1298 1299 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 1300 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1301 ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr); 1302 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1303 if (!inodes) { 1304 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1305 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1306 } else { 1307 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1308 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1309 } 1310 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1311 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1312 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1313 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1314 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1315 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1316 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1317 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1318 PetscFunctionReturn(0); 1319 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1320 PetscInt inodecount,inodelimit,*inodes; 1321 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1322 if (inodes) { 1323 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1324 } else { 1325 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1326 } 1327 PetscFunctionReturn(0); 1328 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1329 PetscFunctionReturn(0); 1330 } 1331 } else if (isbinary) { 1332 if (size == 1) { 1333 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1334 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1335 } else { 1336 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1337 } 1338 PetscFunctionReturn(0); 1339 } else if (iascii && size == 1) { 1340 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1341 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1342 PetscFunctionReturn(0); 1343 } else if (isdraw) { 1344 PetscDraw draw; 1345 PetscBool isnull; 1346 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1347 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1348 if (isnull) PetscFunctionReturn(0); 1349 } 1350 1351 { /* assemble the entire matrix onto first processor */ 1352 Mat A = NULL, Av; 1353 IS isrow,iscol; 1354 1355 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1356 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1357 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1358 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1359 /* The commented code uses MatCreateSubMatrices instead */ 1360 /* 1361 Mat *AA, A = NULL, Av; 1362 IS isrow,iscol; 1363 1364 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1365 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1366 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1367 if (rank == 0) { 1368 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1369 A = AA[0]; 1370 Av = AA[0]; 1371 } 1372 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1373 */ 1374 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1375 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1376 /* 1377 Everyone has to call to draw the matrix since the graphics waits are 1378 synchronized across all processors that share the PetscDraw object 1379 */ 1380 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1381 if (rank == 0) { 1382 if (((PetscObject)mat)->name) { 1383 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1384 } 1385 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1386 } 1387 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1388 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1389 ierr = MatDestroy(&A);CHKERRQ(ierr); 1390 } 1391 PetscFunctionReturn(0); 1392 } 1393 1394 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1395 { 1396 PetscErrorCode ierr; 1397 PetscBool iascii,isdraw,issocket,isbinary; 1398 1399 PetscFunctionBegin; 1400 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1401 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1402 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1403 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1404 if (iascii || isdraw || isbinary || issocket) { 1405 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1411 { 1412 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1413 PetscErrorCode ierr; 1414 Vec bb1 = NULL; 1415 PetscBool hasop; 1416 1417 PetscFunctionBegin; 1418 if (flag == SOR_APPLY_UPPER) { 1419 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1420 PetscFunctionReturn(0); 1421 } 1422 1423 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1424 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1425 } 1426 1427 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1428 if (flag & SOR_ZERO_INITIAL_GUESS) { 1429 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1430 its--; 1431 } 1432 1433 while (its--) { 1434 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1435 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1436 1437 /* update rhs: bb1 = bb - B*x */ 1438 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1439 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1440 1441 /* local sweep */ 1442 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1443 } 1444 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1445 if (flag & SOR_ZERO_INITIAL_GUESS) { 1446 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1447 its--; 1448 } 1449 while (its--) { 1450 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1451 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1452 1453 /* update rhs: bb1 = bb - B*x */ 1454 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1455 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1456 1457 /* local sweep */ 1458 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1459 } 1460 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1461 if (flag & SOR_ZERO_INITIAL_GUESS) { 1462 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1463 its--; 1464 } 1465 while (its--) { 1466 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1467 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1468 1469 /* update rhs: bb1 = bb - B*x */ 1470 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1471 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1472 1473 /* local sweep */ 1474 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1475 } 1476 } else if (flag & SOR_EISENSTAT) { 1477 Vec xx1; 1478 1479 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1480 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1481 1482 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1483 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1484 if (!mat->diag) { 1485 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1486 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1487 } 1488 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1489 if (hasop) { 1490 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1491 } else { 1492 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1493 } 1494 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1495 1496 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1497 1498 /* local sweep */ 1499 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1500 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1501 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1502 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1503 1504 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1505 1506 matin->factorerrortype = mat->A->factorerrortype; 1507 PetscFunctionReturn(0); 1508 } 1509 1510 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1511 { 1512 Mat aA,aB,Aperm; 1513 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1514 PetscScalar *aa,*ba; 1515 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1516 PetscSF rowsf,sf; 1517 IS parcolp = NULL; 1518 PetscBool done; 1519 PetscErrorCode ierr; 1520 1521 PetscFunctionBegin; 1522 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1523 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1524 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1525 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1526 1527 /* Invert row permutation to find out where my rows should go */ 1528 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1529 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1530 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1531 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1532 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1533 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1534 1535 /* Invert column permutation to find out where my columns should go */ 1536 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1537 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1538 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1539 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1540 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1541 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1542 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1543 1544 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1545 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1546 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1547 1548 /* Find out where my gcols should go */ 1549 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1550 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1551 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1552 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1553 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1554 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1555 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1556 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1557 1558 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1559 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1560 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1561 for (i=0; i<m; i++) { 1562 PetscInt row = rdest[i]; 1563 PetscMPIInt rowner; 1564 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1565 for (j=ai[i]; j<ai[i+1]; j++) { 1566 PetscInt col = cdest[aj[j]]; 1567 PetscMPIInt cowner; 1568 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1569 if (rowner == cowner) dnnz[i]++; 1570 else onnz[i]++; 1571 } 1572 for (j=bi[i]; j<bi[i+1]; j++) { 1573 PetscInt col = gcdest[bj[j]]; 1574 PetscMPIInt cowner; 1575 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1576 if (rowner == cowner) dnnz[i]++; 1577 else onnz[i]++; 1578 } 1579 } 1580 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1581 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1582 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1583 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1584 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1585 1586 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1587 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1588 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1589 for (i=0; i<m; i++) { 1590 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1591 PetscInt j0,rowlen; 1592 rowlen = ai[i+1] - ai[i]; 1593 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1594 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1595 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1596 } 1597 rowlen = bi[i+1] - bi[i]; 1598 for (j0=j=0; j<rowlen; j0=j) { 1599 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1600 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1601 } 1602 } 1603 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1604 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1605 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1606 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1607 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1608 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1609 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1610 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1611 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1612 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1613 *B = Aperm; 1614 PetscFunctionReturn(0); 1615 } 1616 1617 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1618 { 1619 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1620 PetscErrorCode ierr; 1621 1622 PetscFunctionBegin; 1623 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1624 if (ghosts) *ghosts = aij->garray; 1625 PetscFunctionReturn(0); 1626 } 1627 1628 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1629 { 1630 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1631 Mat A = mat->A,B = mat->B; 1632 PetscErrorCode ierr; 1633 PetscLogDouble isend[5],irecv[5]; 1634 1635 PetscFunctionBegin; 1636 info->block_size = 1.0; 1637 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1638 1639 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1640 isend[3] = info->memory; isend[4] = info->mallocs; 1641 1642 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1643 1644 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1645 isend[3] += info->memory; isend[4] += info->mallocs; 1646 if (flag == MAT_LOCAL) { 1647 info->nz_used = isend[0]; 1648 info->nz_allocated = isend[1]; 1649 info->nz_unneeded = isend[2]; 1650 info->memory = isend[3]; 1651 info->mallocs = isend[4]; 1652 } else if (flag == MAT_GLOBAL_MAX) { 1653 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1654 1655 info->nz_used = irecv[0]; 1656 info->nz_allocated = irecv[1]; 1657 info->nz_unneeded = irecv[2]; 1658 info->memory = irecv[3]; 1659 info->mallocs = irecv[4]; 1660 } else if (flag == MAT_GLOBAL_SUM) { 1661 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1662 1663 info->nz_used = irecv[0]; 1664 info->nz_allocated = irecv[1]; 1665 info->nz_unneeded = irecv[2]; 1666 info->memory = irecv[3]; 1667 info->mallocs = irecv[4]; 1668 } 1669 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1670 info->fill_ratio_needed = 0; 1671 info->factor_mallocs = 0; 1672 PetscFunctionReturn(0); 1673 } 1674 1675 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1676 { 1677 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1678 PetscErrorCode ierr; 1679 1680 PetscFunctionBegin; 1681 switch (op) { 1682 case MAT_NEW_NONZERO_LOCATIONS: 1683 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1684 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1685 case MAT_KEEP_NONZERO_PATTERN: 1686 case MAT_NEW_NONZERO_LOCATION_ERR: 1687 case MAT_USE_INODES: 1688 case MAT_IGNORE_ZERO_ENTRIES: 1689 case MAT_FORM_EXPLICIT_TRANSPOSE: 1690 MatCheckPreallocated(A,1); 1691 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1692 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1693 break; 1694 case MAT_ROW_ORIENTED: 1695 MatCheckPreallocated(A,1); 1696 a->roworiented = flg; 1697 1698 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1699 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1700 break; 1701 case MAT_FORCE_DIAGONAL_ENTRIES: 1702 case MAT_SORTED_FULL: 1703 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1704 break; 1705 case MAT_IGNORE_OFF_PROC_ENTRIES: 1706 a->donotstash = flg; 1707 break; 1708 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1709 case MAT_SPD: 1710 case MAT_SYMMETRIC: 1711 case MAT_STRUCTURALLY_SYMMETRIC: 1712 case MAT_HERMITIAN: 1713 case MAT_SYMMETRY_ETERNAL: 1714 break; 1715 case MAT_SUBMAT_SINGLEIS: 1716 A->submat_singleis = flg; 1717 break; 1718 case MAT_STRUCTURE_ONLY: 1719 /* The option is handled directly by MatSetOption() */ 1720 break; 1721 default: 1722 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1723 } 1724 PetscFunctionReturn(0); 1725 } 1726 1727 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1728 { 1729 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1730 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1731 PetscErrorCode ierr; 1732 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1733 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1734 PetscInt *cmap,*idx_p; 1735 1736 PetscFunctionBegin; 1737 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1738 mat->getrowactive = PETSC_TRUE; 1739 1740 if (!mat->rowvalues && (idx || v)) { 1741 /* 1742 allocate enough space to hold information from the longest row. 1743 */ 1744 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1745 PetscInt max = 1,tmp; 1746 for (i=0; i<matin->rmap->n; i++) { 1747 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1748 if (max < tmp) max = tmp; 1749 } 1750 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1751 } 1752 1753 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1754 lrow = row - rstart; 1755 1756 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1757 if (!v) {pvA = NULL; pvB = NULL;} 1758 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1759 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1760 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1761 nztot = nzA + nzB; 1762 1763 cmap = mat->garray; 1764 if (v || idx) { 1765 if (nztot) { 1766 /* Sort by increasing column numbers, assuming A and B already sorted */ 1767 PetscInt imark = -1; 1768 if (v) { 1769 *v = v_p = mat->rowvalues; 1770 for (i=0; i<nzB; i++) { 1771 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1772 else break; 1773 } 1774 imark = i; 1775 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1776 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1777 } 1778 if (idx) { 1779 *idx = idx_p = mat->rowindices; 1780 if (imark > -1) { 1781 for (i=0; i<imark; i++) { 1782 idx_p[i] = cmap[cworkB[i]]; 1783 } 1784 } else { 1785 for (i=0; i<nzB; i++) { 1786 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1787 else break; 1788 } 1789 imark = i; 1790 } 1791 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1792 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1793 } 1794 } else { 1795 if (idx) *idx = NULL; 1796 if (v) *v = NULL; 1797 } 1798 } 1799 *nz = nztot; 1800 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1801 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1802 PetscFunctionReturn(0); 1803 } 1804 1805 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1806 { 1807 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1808 1809 PetscFunctionBegin; 1810 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1811 aij->getrowactive = PETSC_FALSE; 1812 PetscFunctionReturn(0); 1813 } 1814 1815 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1816 { 1817 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1818 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1819 PetscErrorCode ierr; 1820 PetscInt i,j,cstart = mat->cmap->rstart; 1821 PetscReal sum = 0.0; 1822 const MatScalar *v,*amata,*bmata; 1823 1824 PetscFunctionBegin; 1825 if (aij->size == 1) { 1826 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1827 } else { 1828 ierr = MatSeqAIJGetArrayRead(aij->A,&amata);CHKERRQ(ierr); 1829 ierr = MatSeqAIJGetArrayRead(aij->B,&bmata);CHKERRQ(ierr); 1830 if (type == NORM_FROBENIUS) { 1831 v = amata; 1832 for (i=0; i<amat->nz; i++) { 1833 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1834 } 1835 v = bmata; 1836 for (i=0; i<bmat->nz; i++) { 1837 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1838 } 1839 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1840 *norm = PetscSqrtReal(*norm); 1841 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1842 } else if (type == NORM_1) { /* max column norm */ 1843 PetscReal *tmp,*tmp2; 1844 PetscInt *jj,*garray = aij->garray; 1845 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1846 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1847 *norm = 0.0; 1848 v = amata; jj = amat->j; 1849 for (j=0; j<amat->nz; j++) { 1850 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1851 } 1852 v = bmata; jj = bmat->j; 1853 for (j=0; j<bmat->nz; j++) { 1854 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1855 } 1856 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1857 for (j=0; j<mat->cmap->N; j++) { 1858 if (tmp2[j] > *norm) *norm = tmp2[j]; 1859 } 1860 ierr = PetscFree(tmp);CHKERRQ(ierr); 1861 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1862 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1863 } else if (type == NORM_INFINITY) { /* max row norm */ 1864 PetscReal ntemp = 0.0; 1865 for (j=0; j<aij->A->rmap->n; j++) { 1866 v = amata + amat->i[j]; 1867 sum = 0.0; 1868 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1869 sum += PetscAbsScalar(*v); v++; 1870 } 1871 v = bmata + bmat->i[j]; 1872 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1873 sum += PetscAbsScalar(*v); v++; 1874 } 1875 if (sum > ntemp) ntemp = sum; 1876 } 1877 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1878 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1879 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1880 ierr = MatSeqAIJRestoreArrayRead(aij->A,&amata);CHKERRQ(ierr); 1881 ierr = MatSeqAIJRestoreArrayRead(aij->B,&bmata);CHKERRQ(ierr); 1882 } 1883 PetscFunctionReturn(0); 1884 } 1885 1886 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1887 { 1888 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1889 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1890 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1891 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1892 PetscErrorCode ierr; 1893 Mat B,A_diag,*B_diag; 1894 const MatScalar *pbv,*bv; 1895 1896 PetscFunctionBegin; 1897 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1898 ai = Aloc->i; aj = Aloc->j; 1899 bi = Bloc->i; bj = Bloc->j; 1900 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1901 PetscInt *d_nnz,*g_nnz,*o_nnz; 1902 PetscSFNode *oloc; 1903 PETSC_UNUSED PetscSF sf; 1904 1905 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1906 /* compute d_nnz for preallocation */ 1907 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 1908 for (i=0; i<ai[ma]; i++) { 1909 d_nnz[aj[i]]++; 1910 } 1911 /* compute local off-diagonal contributions */ 1912 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 1913 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1914 /* map those to global */ 1915 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1916 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1917 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1918 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 1919 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1920 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1921 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1922 1923 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1924 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1925 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1926 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1927 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1928 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1929 } else { 1930 B = *matout; 1931 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1932 } 1933 1934 b = (Mat_MPIAIJ*)B->data; 1935 A_diag = a->A; 1936 B_diag = &b->A; 1937 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1938 A_diag_ncol = A_diag->cmap->N; 1939 B_diag_ilen = sub_B_diag->ilen; 1940 B_diag_i = sub_B_diag->i; 1941 1942 /* Set ilen for diagonal of B */ 1943 for (i=0; i<A_diag_ncol; i++) { 1944 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1945 } 1946 1947 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1948 very quickly (=without using MatSetValues), because all writes are local. */ 1949 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 1950 1951 /* copy over the B part */ 1952 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 1953 ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr); 1954 pbv = bv; 1955 row = A->rmap->rstart; 1956 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1957 cols_tmp = cols; 1958 for (i=0; i<mb; i++) { 1959 ncol = bi[i+1]-bi[i]; 1960 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr); 1961 row++; 1962 pbv += ncol; cols_tmp += ncol; 1963 } 1964 ierr = PetscFree(cols);CHKERRQ(ierr); 1965 ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr); 1966 1967 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1968 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1969 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1970 *matout = B; 1971 } else { 1972 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1973 } 1974 PetscFunctionReturn(0); 1975 } 1976 1977 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1978 { 1979 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1980 Mat a = aij->A,b = aij->B; 1981 PetscErrorCode ierr; 1982 PetscInt s1,s2,s3; 1983 1984 PetscFunctionBegin; 1985 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1986 if (rr) { 1987 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1988 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1989 /* Overlap communication with computation. */ 1990 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1991 } 1992 if (ll) { 1993 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1994 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1995 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1996 } 1997 /* scale the diagonal block */ 1998 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1999 2000 if (rr) { 2001 /* Do a scatter end and then right scale the off-diagonal block */ 2002 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2003 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 2004 } 2005 PetscFunctionReturn(0); 2006 } 2007 2008 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2009 { 2010 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2011 PetscErrorCode ierr; 2012 2013 PetscFunctionBegin; 2014 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2015 PetscFunctionReturn(0); 2016 } 2017 2018 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2019 { 2020 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2021 Mat a,b,c,d; 2022 PetscBool flg; 2023 PetscErrorCode ierr; 2024 2025 PetscFunctionBegin; 2026 a = matA->A; b = matA->B; 2027 c = matB->A; d = matB->B; 2028 2029 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2030 if (flg) { 2031 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2032 } 2033 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2034 PetscFunctionReturn(0); 2035 } 2036 2037 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2038 { 2039 PetscErrorCode ierr; 2040 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2041 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2042 2043 PetscFunctionBegin; 2044 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2045 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2046 /* because of the column compression in the off-processor part of the matrix a->B, 2047 the number of columns in a->B and b->B may be different, hence we cannot call 2048 the MatCopy() directly on the two parts. If need be, we can provide a more 2049 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2050 then copying the submatrices */ 2051 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2052 } else { 2053 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2054 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2055 } 2056 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2057 PetscFunctionReturn(0); 2058 } 2059 2060 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2061 { 2062 PetscErrorCode ierr; 2063 2064 PetscFunctionBegin; 2065 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2066 PetscFunctionReturn(0); 2067 } 2068 2069 /* 2070 Computes the number of nonzeros per row needed for preallocation when X and Y 2071 have different nonzero structure. 2072 */ 2073 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2074 { 2075 PetscInt i,j,k,nzx,nzy; 2076 2077 PetscFunctionBegin; 2078 /* Set the number of nonzeros in the new matrix */ 2079 for (i=0; i<m; i++) { 2080 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2081 nzx = xi[i+1] - xi[i]; 2082 nzy = yi[i+1] - yi[i]; 2083 nnz[i] = 0; 2084 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2085 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2086 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2087 nnz[i]++; 2088 } 2089 for (; k<nzy; k++) nnz[i]++; 2090 } 2091 PetscFunctionReturn(0); 2092 } 2093 2094 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2095 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2096 { 2097 PetscErrorCode ierr; 2098 PetscInt m = Y->rmap->N; 2099 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2100 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2101 2102 PetscFunctionBegin; 2103 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2104 PetscFunctionReturn(0); 2105 } 2106 2107 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2108 { 2109 PetscErrorCode ierr; 2110 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2111 2112 PetscFunctionBegin; 2113 if (str == SAME_NONZERO_PATTERN) { 2114 ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr); 2115 ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr); 2116 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2117 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2118 } else { 2119 Mat B; 2120 PetscInt *nnz_d,*nnz_o; 2121 2122 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2123 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2124 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2125 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2126 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2127 ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr); 2128 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2129 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2130 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2131 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2132 ierr = MatHeaderMerge(Y,&B);CHKERRQ(ierr); 2133 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2134 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2135 } 2136 PetscFunctionReturn(0); 2137 } 2138 2139 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2140 2141 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2142 { 2143 #if defined(PETSC_USE_COMPLEX) 2144 PetscErrorCode ierr; 2145 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2146 2147 PetscFunctionBegin; 2148 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2149 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2150 #else 2151 PetscFunctionBegin; 2152 #endif 2153 PetscFunctionReturn(0); 2154 } 2155 2156 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2157 { 2158 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2159 PetscErrorCode ierr; 2160 2161 PetscFunctionBegin; 2162 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2163 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2164 PetscFunctionReturn(0); 2165 } 2166 2167 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2168 { 2169 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2170 PetscErrorCode ierr; 2171 2172 PetscFunctionBegin; 2173 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2174 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2175 PetscFunctionReturn(0); 2176 } 2177 2178 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2179 { 2180 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2181 PetscErrorCode ierr; 2182 PetscInt i,*idxb = NULL,m = A->rmap->n; 2183 PetscScalar *va,*vv; 2184 Vec vB,vA; 2185 const PetscScalar *vb; 2186 2187 PetscFunctionBegin; 2188 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 2189 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2190 2191 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2192 if (idx) { 2193 for (i=0; i<m; i++) { 2194 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2195 } 2196 } 2197 2198 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 2199 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 2200 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2201 2202 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2203 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2204 for (i=0; i<m; i++) { 2205 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2206 vv[i] = vb[i]; 2207 if (idx) idx[i] = a->garray[idxb[i]]; 2208 } else { 2209 vv[i] = va[i]; 2210 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2211 idx[i] = a->garray[idxb[i]]; 2212 } 2213 } 2214 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2215 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2216 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2217 ierr = PetscFree(idxb);CHKERRQ(ierr); 2218 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2219 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2220 PetscFunctionReturn(0); 2221 } 2222 2223 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2224 { 2225 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2226 PetscInt m = A->rmap->n,n = A->cmap->n; 2227 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2228 PetscInt *cmap = mat->garray; 2229 PetscInt *diagIdx, *offdiagIdx; 2230 Vec diagV, offdiagV; 2231 PetscScalar *a, *diagA, *offdiagA; 2232 const PetscScalar *ba,*bav; 2233 PetscInt r,j,col,ncols,*bi,*bj; 2234 PetscErrorCode ierr; 2235 Mat B = mat->B; 2236 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2237 2238 PetscFunctionBegin; 2239 /* When a process holds entire A and other processes have no entry */ 2240 if (A->cmap->N == n) { 2241 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2242 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2243 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr); 2244 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2245 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2246 PetscFunctionReturn(0); 2247 } else if (n == 0) { 2248 if (m) { 2249 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2250 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2251 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2252 } 2253 PetscFunctionReturn(0); 2254 } 2255 2256 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2257 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2258 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2259 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2260 2261 /* Get offdiagIdx[] for implicit 0.0 */ 2262 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2263 ba = bav; 2264 bi = b->i; 2265 bj = b->j; 2266 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2267 for (r = 0; r < m; r++) { 2268 ncols = bi[r+1] - bi[r]; 2269 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2270 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2271 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2272 offdiagA[r] = 0.0; 2273 2274 /* Find first hole in the cmap */ 2275 for (j=0; j<ncols; j++) { 2276 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2277 if (col > j && j < cstart) { 2278 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2279 break; 2280 } else if (col > j + n && j >= cstart) { 2281 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2282 break; 2283 } 2284 } 2285 if (j == ncols && ncols < A->cmap->N - n) { 2286 /* a hole is outside compressed Bcols */ 2287 if (ncols == 0) { 2288 if (cstart) { 2289 offdiagIdx[r] = 0; 2290 } else offdiagIdx[r] = cend; 2291 } else { /* ncols > 0 */ 2292 offdiagIdx[r] = cmap[ncols-1] + 1; 2293 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2294 } 2295 } 2296 } 2297 2298 for (j=0; j<ncols; j++) { 2299 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2300 ba++; bj++; 2301 } 2302 } 2303 2304 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2305 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2306 for (r = 0; r < m; ++r) { 2307 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2308 a[r] = diagA[r]; 2309 if (idx) idx[r] = cstart + diagIdx[r]; 2310 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2311 a[r] = diagA[r]; 2312 if (idx) { 2313 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2314 idx[r] = cstart + diagIdx[r]; 2315 } else idx[r] = offdiagIdx[r]; 2316 } 2317 } else { 2318 a[r] = offdiagA[r]; 2319 if (idx) idx[r] = offdiagIdx[r]; 2320 } 2321 } 2322 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2323 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2324 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2325 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2326 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2327 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2328 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2329 PetscFunctionReturn(0); 2330 } 2331 2332 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2333 { 2334 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2335 PetscInt m = A->rmap->n,n = A->cmap->n; 2336 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2337 PetscInt *cmap = mat->garray; 2338 PetscInt *diagIdx, *offdiagIdx; 2339 Vec diagV, offdiagV; 2340 PetscScalar *a, *diagA, *offdiagA; 2341 const PetscScalar *ba,*bav; 2342 PetscInt r,j,col,ncols,*bi,*bj; 2343 PetscErrorCode ierr; 2344 Mat B = mat->B; 2345 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2346 2347 PetscFunctionBegin; 2348 /* When a process holds entire A and other processes have no entry */ 2349 if (A->cmap->N == n) { 2350 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2351 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2352 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2353 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2354 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2355 PetscFunctionReturn(0); 2356 } else if (n == 0) { 2357 if (m) { 2358 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2359 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2360 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2361 } 2362 PetscFunctionReturn(0); 2363 } 2364 2365 ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2366 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2367 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2368 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2369 2370 /* Get offdiagIdx[] for implicit 0.0 */ 2371 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2372 ba = bav; 2373 bi = b->i; 2374 bj = b->j; 2375 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2376 for (r = 0; r < m; r++) { 2377 ncols = bi[r+1] - bi[r]; 2378 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2379 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2380 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2381 offdiagA[r] = 0.0; 2382 2383 /* Find first hole in the cmap */ 2384 for (j=0; j<ncols; j++) { 2385 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2386 if (col > j && j < cstart) { 2387 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2388 break; 2389 } else if (col > j + n && j >= cstart) { 2390 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2391 break; 2392 } 2393 } 2394 if (j == ncols && ncols < A->cmap->N - n) { 2395 /* a hole is outside compressed Bcols */ 2396 if (ncols == 0) { 2397 if (cstart) { 2398 offdiagIdx[r] = 0; 2399 } else offdiagIdx[r] = cend; 2400 } else { /* ncols > 0 */ 2401 offdiagIdx[r] = cmap[ncols-1] + 1; 2402 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2403 } 2404 } 2405 } 2406 2407 for (j=0; j<ncols; j++) { 2408 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2409 ba++; bj++; 2410 } 2411 } 2412 2413 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2414 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2415 for (r = 0; r < m; ++r) { 2416 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2417 a[r] = diagA[r]; 2418 if (idx) idx[r] = cstart + diagIdx[r]; 2419 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2420 a[r] = diagA[r]; 2421 if (idx) { 2422 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2423 idx[r] = cstart + diagIdx[r]; 2424 } else idx[r] = offdiagIdx[r]; 2425 } 2426 } else { 2427 a[r] = offdiagA[r]; 2428 if (idx) idx[r] = offdiagIdx[r]; 2429 } 2430 } 2431 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2432 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2433 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2434 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2435 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2436 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2437 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2438 PetscFunctionReturn(0); 2439 } 2440 2441 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2442 { 2443 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2444 PetscInt m = A->rmap->n,n = A->cmap->n; 2445 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2446 PetscInt *cmap = mat->garray; 2447 PetscInt *diagIdx, *offdiagIdx; 2448 Vec diagV, offdiagV; 2449 PetscScalar *a, *diagA, *offdiagA; 2450 const PetscScalar *ba,*bav; 2451 PetscInt r,j,col,ncols,*bi,*bj; 2452 PetscErrorCode ierr; 2453 Mat B = mat->B; 2454 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2455 2456 PetscFunctionBegin; 2457 /* When a process holds entire A and other processes have no entry */ 2458 if (A->cmap->N == n) { 2459 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2460 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2461 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2462 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2463 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2464 PetscFunctionReturn(0); 2465 } else if (n == 0) { 2466 if (m) { 2467 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2468 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2469 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2470 } 2471 PetscFunctionReturn(0); 2472 } 2473 2474 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2475 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2476 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2477 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2478 2479 /* Get offdiagIdx[] for implicit 0.0 */ 2480 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2481 ba = bav; 2482 bi = b->i; 2483 bj = b->j; 2484 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2485 for (r = 0; r < m; r++) { 2486 ncols = bi[r+1] - bi[r]; 2487 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2488 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2489 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2490 offdiagA[r] = 0.0; 2491 2492 /* Find first hole in the cmap */ 2493 for (j=0; j<ncols; j++) { 2494 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2495 if (col > j && j < cstart) { 2496 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2497 break; 2498 } else if (col > j + n && j >= cstart) { 2499 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2500 break; 2501 } 2502 } 2503 if (j == ncols && ncols < A->cmap->N - n) { 2504 /* a hole is outside compressed Bcols */ 2505 if (ncols == 0) { 2506 if (cstart) { 2507 offdiagIdx[r] = 0; 2508 } else offdiagIdx[r] = cend; 2509 } else { /* ncols > 0 */ 2510 offdiagIdx[r] = cmap[ncols-1] + 1; 2511 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2512 } 2513 } 2514 } 2515 2516 for (j=0; j<ncols; j++) { 2517 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2518 ba++; bj++; 2519 } 2520 } 2521 2522 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2523 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2524 for (r = 0; r < m; ++r) { 2525 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2526 a[r] = diagA[r]; 2527 if (idx) idx[r] = cstart + diagIdx[r]; 2528 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2529 a[r] = diagA[r]; 2530 if (idx) { 2531 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2532 idx[r] = cstart + diagIdx[r]; 2533 } else idx[r] = offdiagIdx[r]; 2534 } 2535 } else { 2536 a[r] = offdiagA[r]; 2537 if (idx) idx[r] = offdiagIdx[r]; 2538 } 2539 } 2540 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2541 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2542 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2543 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2544 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2545 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2546 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2547 PetscFunctionReturn(0); 2548 } 2549 2550 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2551 { 2552 PetscErrorCode ierr; 2553 Mat *dummy; 2554 2555 PetscFunctionBegin; 2556 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2557 *newmat = *dummy; 2558 ierr = PetscFree(dummy);CHKERRQ(ierr); 2559 PetscFunctionReturn(0); 2560 } 2561 2562 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2563 { 2564 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2565 PetscErrorCode ierr; 2566 2567 PetscFunctionBegin; 2568 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2569 A->factorerrortype = a->A->factorerrortype; 2570 PetscFunctionReturn(0); 2571 } 2572 2573 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2574 { 2575 PetscErrorCode ierr; 2576 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2577 2578 PetscFunctionBegin; 2579 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2580 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2581 if (x->assembled) { 2582 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2583 } else { 2584 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2585 } 2586 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2587 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2588 PetscFunctionReturn(0); 2589 } 2590 2591 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2592 { 2593 PetscFunctionBegin; 2594 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2595 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2596 PetscFunctionReturn(0); 2597 } 2598 2599 /*@ 2600 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2601 2602 Collective on Mat 2603 2604 Input Parameters: 2605 + A - the matrix 2606 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2607 2608 Level: advanced 2609 2610 @*/ 2611 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2612 { 2613 PetscErrorCode ierr; 2614 2615 PetscFunctionBegin; 2616 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2617 PetscFunctionReturn(0); 2618 } 2619 2620 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2621 { 2622 PetscErrorCode ierr; 2623 PetscBool sc = PETSC_FALSE,flg; 2624 2625 PetscFunctionBegin; 2626 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2627 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2628 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2629 if (flg) { 2630 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2631 } 2632 ierr = PetscOptionsTail();CHKERRQ(ierr); 2633 PetscFunctionReturn(0); 2634 } 2635 2636 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2637 { 2638 PetscErrorCode ierr; 2639 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2640 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2641 2642 PetscFunctionBegin; 2643 if (!Y->preallocated) { 2644 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2645 } else if (!aij->nz) { 2646 PetscInt nonew = aij->nonew; 2647 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2648 aij->nonew = nonew; 2649 } 2650 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2651 PetscFunctionReturn(0); 2652 } 2653 2654 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2655 { 2656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2657 PetscErrorCode ierr; 2658 2659 PetscFunctionBegin; 2660 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2661 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2662 if (d) { 2663 PetscInt rstart; 2664 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2665 *d += rstart; 2666 2667 } 2668 PetscFunctionReturn(0); 2669 } 2670 2671 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2672 { 2673 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2674 PetscErrorCode ierr; 2675 2676 PetscFunctionBegin; 2677 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2678 PetscFunctionReturn(0); 2679 } 2680 2681 /* -------------------------------------------------------------------*/ 2682 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2683 MatGetRow_MPIAIJ, 2684 MatRestoreRow_MPIAIJ, 2685 MatMult_MPIAIJ, 2686 /* 4*/ MatMultAdd_MPIAIJ, 2687 MatMultTranspose_MPIAIJ, 2688 MatMultTransposeAdd_MPIAIJ, 2689 NULL, 2690 NULL, 2691 NULL, 2692 /*10*/ NULL, 2693 NULL, 2694 NULL, 2695 MatSOR_MPIAIJ, 2696 MatTranspose_MPIAIJ, 2697 /*15*/ MatGetInfo_MPIAIJ, 2698 MatEqual_MPIAIJ, 2699 MatGetDiagonal_MPIAIJ, 2700 MatDiagonalScale_MPIAIJ, 2701 MatNorm_MPIAIJ, 2702 /*20*/ MatAssemblyBegin_MPIAIJ, 2703 MatAssemblyEnd_MPIAIJ, 2704 MatSetOption_MPIAIJ, 2705 MatZeroEntries_MPIAIJ, 2706 /*24*/ MatZeroRows_MPIAIJ, 2707 NULL, 2708 NULL, 2709 NULL, 2710 NULL, 2711 /*29*/ MatSetUp_MPIAIJ, 2712 NULL, 2713 NULL, 2714 MatGetDiagonalBlock_MPIAIJ, 2715 NULL, 2716 /*34*/ MatDuplicate_MPIAIJ, 2717 NULL, 2718 NULL, 2719 NULL, 2720 NULL, 2721 /*39*/ MatAXPY_MPIAIJ, 2722 MatCreateSubMatrices_MPIAIJ, 2723 MatIncreaseOverlap_MPIAIJ, 2724 MatGetValues_MPIAIJ, 2725 MatCopy_MPIAIJ, 2726 /*44*/ MatGetRowMax_MPIAIJ, 2727 MatScale_MPIAIJ, 2728 MatShift_MPIAIJ, 2729 MatDiagonalSet_MPIAIJ, 2730 MatZeroRowsColumns_MPIAIJ, 2731 /*49*/ MatSetRandom_MPIAIJ, 2732 NULL, 2733 NULL, 2734 NULL, 2735 NULL, 2736 /*54*/ MatFDColoringCreate_MPIXAIJ, 2737 NULL, 2738 MatSetUnfactored_MPIAIJ, 2739 MatPermute_MPIAIJ, 2740 NULL, 2741 /*59*/ MatCreateSubMatrix_MPIAIJ, 2742 MatDestroy_MPIAIJ, 2743 MatView_MPIAIJ, 2744 NULL, 2745 NULL, 2746 /*64*/ NULL, 2747 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2748 NULL, 2749 NULL, 2750 NULL, 2751 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2752 MatGetRowMinAbs_MPIAIJ, 2753 NULL, 2754 NULL, 2755 NULL, 2756 NULL, 2757 /*75*/ MatFDColoringApply_AIJ, 2758 MatSetFromOptions_MPIAIJ, 2759 NULL, 2760 NULL, 2761 MatFindZeroDiagonals_MPIAIJ, 2762 /*80*/ NULL, 2763 NULL, 2764 NULL, 2765 /*83*/ MatLoad_MPIAIJ, 2766 MatIsSymmetric_MPIAIJ, 2767 NULL, 2768 NULL, 2769 NULL, 2770 NULL, 2771 /*89*/ NULL, 2772 NULL, 2773 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2774 NULL, 2775 NULL, 2776 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2777 NULL, 2778 NULL, 2779 NULL, 2780 MatBindToCPU_MPIAIJ, 2781 /*99*/ MatProductSetFromOptions_MPIAIJ, 2782 NULL, 2783 NULL, 2784 MatConjugate_MPIAIJ, 2785 NULL, 2786 /*104*/MatSetValuesRow_MPIAIJ, 2787 MatRealPart_MPIAIJ, 2788 MatImaginaryPart_MPIAIJ, 2789 NULL, 2790 NULL, 2791 /*109*/NULL, 2792 NULL, 2793 MatGetRowMin_MPIAIJ, 2794 NULL, 2795 MatMissingDiagonal_MPIAIJ, 2796 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2797 NULL, 2798 MatGetGhosts_MPIAIJ, 2799 NULL, 2800 NULL, 2801 /*119*/MatMultDiagonalBlock_MPIAIJ, 2802 NULL, 2803 NULL, 2804 NULL, 2805 MatGetMultiProcBlock_MPIAIJ, 2806 /*124*/MatFindNonzeroRows_MPIAIJ, 2807 MatGetColumnReductions_MPIAIJ, 2808 MatInvertBlockDiagonal_MPIAIJ, 2809 MatInvertVariableBlockDiagonal_MPIAIJ, 2810 MatCreateSubMatricesMPI_MPIAIJ, 2811 /*129*/NULL, 2812 NULL, 2813 NULL, 2814 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2815 NULL, 2816 /*134*/NULL, 2817 NULL, 2818 NULL, 2819 NULL, 2820 NULL, 2821 /*139*/MatSetBlockSizes_MPIAIJ, 2822 NULL, 2823 NULL, 2824 MatFDColoringSetUp_MPIXAIJ, 2825 MatFindOffBlockDiagonalEntries_MPIAIJ, 2826 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2827 /*145*/NULL, 2828 NULL, 2829 NULL 2830 }; 2831 2832 /* ----------------------------------------------------------------------------------------*/ 2833 2834 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2835 { 2836 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2837 PetscErrorCode ierr; 2838 2839 PetscFunctionBegin; 2840 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2841 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2842 PetscFunctionReturn(0); 2843 } 2844 2845 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2846 { 2847 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2848 PetscErrorCode ierr; 2849 2850 PetscFunctionBegin; 2851 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2852 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2853 PetscFunctionReturn(0); 2854 } 2855 2856 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2857 { 2858 Mat_MPIAIJ *b; 2859 PetscErrorCode ierr; 2860 PetscMPIInt size; 2861 2862 PetscFunctionBegin; 2863 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2864 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2865 b = (Mat_MPIAIJ*)B->data; 2866 2867 #if defined(PETSC_USE_CTABLE) 2868 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2869 #else 2870 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2871 #endif 2872 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2873 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2874 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2875 2876 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2877 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 2878 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2879 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2880 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2881 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2882 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2883 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2884 2885 if (!B->preallocated) { 2886 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2887 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2888 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2889 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2890 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2891 } 2892 2893 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2894 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2895 B->preallocated = PETSC_TRUE; 2896 B->was_assembled = PETSC_FALSE; 2897 B->assembled = PETSC_FALSE; 2898 PetscFunctionReturn(0); 2899 } 2900 2901 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2902 { 2903 Mat_MPIAIJ *b; 2904 PetscErrorCode ierr; 2905 2906 PetscFunctionBegin; 2907 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2908 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2909 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2910 b = (Mat_MPIAIJ*)B->data; 2911 2912 #if defined(PETSC_USE_CTABLE) 2913 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2914 #else 2915 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2916 #endif 2917 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2918 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2919 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2920 2921 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2922 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2923 B->preallocated = PETSC_TRUE; 2924 B->was_assembled = PETSC_FALSE; 2925 B->assembled = PETSC_FALSE; 2926 PetscFunctionReturn(0); 2927 } 2928 2929 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2930 { 2931 Mat mat; 2932 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2933 PetscErrorCode ierr; 2934 2935 PetscFunctionBegin; 2936 *newmat = NULL; 2937 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2938 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2939 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2940 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2941 a = (Mat_MPIAIJ*)mat->data; 2942 2943 mat->factortype = matin->factortype; 2944 mat->assembled = matin->assembled; 2945 mat->insertmode = NOT_SET_VALUES; 2946 mat->preallocated = matin->preallocated; 2947 2948 a->size = oldmat->size; 2949 a->rank = oldmat->rank; 2950 a->donotstash = oldmat->donotstash; 2951 a->roworiented = oldmat->roworiented; 2952 a->rowindices = NULL; 2953 a->rowvalues = NULL; 2954 a->getrowactive = PETSC_FALSE; 2955 2956 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2957 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2958 2959 if (oldmat->colmap) { 2960 #if defined(PETSC_USE_CTABLE) 2961 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2962 #else 2963 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2964 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2965 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2966 #endif 2967 } else a->colmap = NULL; 2968 if (oldmat->garray) { 2969 PetscInt len; 2970 len = oldmat->B->cmap->n; 2971 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2972 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2973 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2974 } else a->garray = NULL; 2975 2976 /* It may happen MatDuplicate is called with a non-assembled matrix 2977 In fact, MatDuplicate only requires the matrix to be preallocated 2978 This may happen inside a DMCreateMatrix_Shell */ 2979 if (oldmat->lvec) { 2980 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2981 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2982 } 2983 if (oldmat->Mvctx) { 2984 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2985 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2986 } 2987 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2988 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2989 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2990 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2991 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2992 *newmat = mat; 2993 PetscFunctionReturn(0); 2994 } 2995 2996 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2997 { 2998 PetscBool isbinary, ishdf5; 2999 PetscErrorCode ierr; 3000 3001 PetscFunctionBegin; 3002 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 3003 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 3004 /* force binary viewer to load .info file if it has not yet done so */ 3005 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3006 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3007 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 3008 if (isbinary) { 3009 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 3010 } else if (ishdf5) { 3011 #if defined(PETSC_HAVE_HDF5) 3012 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 3013 #else 3014 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3015 #endif 3016 } else { 3017 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3018 } 3019 PetscFunctionReturn(0); 3020 } 3021 3022 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3023 { 3024 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3025 PetscInt *rowidxs,*colidxs; 3026 PetscScalar *matvals; 3027 PetscErrorCode ierr; 3028 3029 PetscFunctionBegin; 3030 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3031 3032 /* read in matrix header */ 3033 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3034 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3035 M = header[1]; N = header[2]; nz = header[3]; 3036 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3037 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3038 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3039 3040 /* set block sizes from the viewer's .info file */ 3041 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3042 /* set global sizes if not set already */ 3043 if (mat->rmap->N < 0) mat->rmap->N = M; 3044 if (mat->cmap->N < 0) mat->cmap->N = N; 3045 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3046 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3047 3048 /* check if the matrix sizes are correct */ 3049 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3050 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3051 3052 /* read in row lengths and build row indices */ 3053 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3054 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3055 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3056 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3057 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr); 3058 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3059 /* read in column indices and matrix values */ 3060 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3061 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3062 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3063 /* store matrix indices and values */ 3064 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3065 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3066 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3067 PetscFunctionReturn(0); 3068 } 3069 3070 /* Not scalable because of ISAllGather() unless getting all columns. */ 3071 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3072 { 3073 PetscErrorCode ierr; 3074 IS iscol_local; 3075 PetscBool isstride; 3076 PetscMPIInt lisstride=0,gisstride; 3077 3078 PetscFunctionBegin; 3079 /* check if we are grabbing all columns*/ 3080 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3081 3082 if (isstride) { 3083 PetscInt start,len,mstart,mlen; 3084 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3085 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3086 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3087 if (mstart == start && mlen-mstart == len) lisstride = 1; 3088 } 3089 3090 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3091 if (gisstride) { 3092 PetscInt N; 3093 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3094 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3095 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3096 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3097 } else { 3098 PetscInt cbs; 3099 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3100 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3101 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3102 } 3103 3104 *isseq = iscol_local; 3105 PetscFunctionReturn(0); 3106 } 3107 3108 /* 3109 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3110 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3111 3112 Input Parameters: 3113 mat - matrix 3114 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3115 i.e., mat->rstart <= isrow[i] < mat->rend 3116 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3117 i.e., mat->cstart <= iscol[i] < mat->cend 3118 Output Parameter: 3119 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3120 iscol_o - sequential column index set for retrieving mat->B 3121 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3122 */ 3123 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3124 { 3125 PetscErrorCode ierr; 3126 Vec x,cmap; 3127 const PetscInt *is_idx; 3128 PetscScalar *xarray,*cmaparray; 3129 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3130 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3131 Mat B=a->B; 3132 Vec lvec=a->lvec,lcmap; 3133 PetscInt i,cstart,cend,Bn=B->cmap->N; 3134 MPI_Comm comm; 3135 VecScatter Mvctx=a->Mvctx; 3136 3137 PetscFunctionBegin; 3138 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3139 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3140 3141 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3142 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3143 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3144 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3145 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3146 3147 /* Get start indices */ 3148 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3149 isstart -= ncols; 3150 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3151 3152 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3153 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3154 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3155 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3156 for (i=0; i<ncols; i++) { 3157 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3158 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3159 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3160 } 3161 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3162 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3163 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3164 3165 /* Get iscol_d */ 3166 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3167 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3168 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3169 3170 /* Get isrow_d */ 3171 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3172 rstart = mat->rmap->rstart; 3173 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3174 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3175 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3176 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3177 3178 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3179 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3180 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3181 3182 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3183 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3184 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3185 3186 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3187 3188 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3189 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3190 3191 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3192 /* off-process column indices */ 3193 count = 0; 3194 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3195 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3196 3197 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3198 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3199 for (i=0; i<Bn; i++) { 3200 if (PetscRealPart(xarray[i]) > -1.0) { 3201 idx[count] = i; /* local column index in off-diagonal part B */ 3202 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3203 count++; 3204 } 3205 } 3206 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3207 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3208 3209 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3210 /* cannot ensure iscol_o has same blocksize as iscol! */ 3211 3212 ierr = PetscFree(idx);CHKERRQ(ierr); 3213 *garray = cmap1; 3214 3215 ierr = VecDestroy(&x);CHKERRQ(ierr); 3216 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3217 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3218 PetscFunctionReturn(0); 3219 } 3220 3221 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3222 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3223 { 3224 PetscErrorCode ierr; 3225 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3226 Mat M = NULL; 3227 MPI_Comm comm; 3228 IS iscol_d,isrow_d,iscol_o; 3229 Mat Asub = NULL,Bsub = NULL; 3230 PetscInt n; 3231 3232 PetscFunctionBegin; 3233 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3234 3235 if (call == MAT_REUSE_MATRIX) { 3236 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3237 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3238 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3239 3240 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3241 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3242 3243 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3244 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3245 3246 /* Update diagonal and off-diagonal portions of submat */ 3247 asub = (Mat_MPIAIJ*)(*submat)->data; 3248 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3249 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3250 if (n) { 3251 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3252 } 3253 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3254 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3255 3256 } else { /* call == MAT_INITIAL_MATRIX) */ 3257 const PetscInt *garray; 3258 PetscInt BsubN; 3259 3260 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3261 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3262 3263 /* Create local submatrices Asub and Bsub */ 3264 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3265 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3266 3267 /* Create submatrix M */ 3268 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3269 3270 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3271 asub = (Mat_MPIAIJ*)M->data; 3272 3273 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3274 n = asub->B->cmap->N; 3275 if (BsubN > n) { 3276 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3277 const PetscInt *idx; 3278 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3279 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3280 3281 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3282 j = 0; 3283 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3284 for (i=0; i<n; i++) { 3285 if (j >= BsubN) break; 3286 while (subgarray[i] > garray[j]) j++; 3287 3288 if (subgarray[i] == garray[j]) { 3289 idx_new[i] = idx[j++]; 3290 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3291 } 3292 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3293 3294 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3295 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3296 3297 } else if (BsubN < n) { 3298 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3299 } 3300 3301 ierr = PetscFree(garray);CHKERRQ(ierr); 3302 *submat = M; 3303 3304 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3305 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3306 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3307 3308 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3309 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3310 3311 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3312 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3313 } 3314 PetscFunctionReturn(0); 3315 } 3316 3317 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3318 { 3319 PetscErrorCode ierr; 3320 IS iscol_local=NULL,isrow_d; 3321 PetscInt csize; 3322 PetscInt n,i,j,start,end; 3323 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3324 MPI_Comm comm; 3325 3326 PetscFunctionBegin; 3327 /* If isrow has same processor distribution as mat, 3328 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3329 if (call == MAT_REUSE_MATRIX) { 3330 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3331 if (isrow_d) { 3332 sameRowDist = PETSC_TRUE; 3333 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3334 } else { 3335 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3336 if (iscol_local) { 3337 sameRowDist = PETSC_TRUE; 3338 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3339 } 3340 } 3341 } else { 3342 /* Check if isrow has same processor distribution as mat */ 3343 sameDist[0] = PETSC_FALSE; 3344 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3345 if (!n) { 3346 sameDist[0] = PETSC_TRUE; 3347 } else { 3348 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3349 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3350 if (i >= start && j < end) { 3351 sameDist[0] = PETSC_TRUE; 3352 } 3353 } 3354 3355 /* Check if iscol has same processor distribution as mat */ 3356 sameDist[1] = PETSC_FALSE; 3357 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3358 if (!n) { 3359 sameDist[1] = PETSC_TRUE; 3360 } else { 3361 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3362 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3363 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3364 } 3365 3366 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3367 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr); 3368 sameRowDist = tsameDist[0]; 3369 } 3370 3371 if (sameRowDist) { 3372 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3373 /* isrow and iscol have same processor distribution as mat */ 3374 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3375 PetscFunctionReturn(0); 3376 } else { /* sameRowDist */ 3377 /* isrow has same processor distribution as mat */ 3378 if (call == MAT_INITIAL_MATRIX) { 3379 PetscBool sorted; 3380 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3381 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3382 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3383 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3384 3385 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3386 if (sorted) { 3387 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3388 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3389 PetscFunctionReturn(0); 3390 } 3391 } else { /* call == MAT_REUSE_MATRIX */ 3392 IS iscol_sub; 3393 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3394 if (iscol_sub) { 3395 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3396 PetscFunctionReturn(0); 3397 } 3398 } 3399 } 3400 } 3401 3402 /* General case: iscol -> iscol_local which has global size of iscol */ 3403 if (call == MAT_REUSE_MATRIX) { 3404 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3405 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3406 } else { 3407 if (!iscol_local) { 3408 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3409 } 3410 } 3411 3412 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3413 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3414 3415 if (call == MAT_INITIAL_MATRIX) { 3416 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3417 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3418 } 3419 PetscFunctionReturn(0); 3420 } 3421 3422 /*@C 3423 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3424 and "off-diagonal" part of the matrix in CSR format. 3425 3426 Collective 3427 3428 Input Parameters: 3429 + comm - MPI communicator 3430 . A - "diagonal" portion of matrix 3431 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3432 - garray - global index of B columns 3433 3434 Output Parameter: 3435 . mat - the matrix, with input A as its local diagonal matrix 3436 Level: advanced 3437 3438 Notes: 3439 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3440 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3441 3442 .seealso: MatCreateMPIAIJWithSplitArrays() 3443 @*/ 3444 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3445 { 3446 PetscErrorCode ierr; 3447 Mat_MPIAIJ *maij; 3448 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3449 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3450 const PetscScalar *oa; 3451 Mat Bnew; 3452 PetscInt m,n,N; 3453 3454 PetscFunctionBegin; 3455 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3456 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3457 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3458 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3459 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3460 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3461 3462 /* Get global columns of mat */ 3463 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3464 3465 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3466 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3467 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3468 maij = (Mat_MPIAIJ*)(*mat)->data; 3469 3470 (*mat)->preallocated = PETSC_TRUE; 3471 3472 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3473 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3474 3475 /* Set A as diagonal portion of *mat */ 3476 maij->A = A; 3477 3478 nz = oi[m]; 3479 for (i=0; i<nz; i++) { 3480 col = oj[i]; 3481 oj[i] = garray[col]; 3482 } 3483 3484 /* Set Bnew as off-diagonal portion of *mat */ 3485 ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr); 3486 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr); 3487 ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr); 3488 bnew = (Mat_SeqAIJ*)Bnew->data; 3489 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3490 maij->B = Bnew; 3491 3492 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3493 3494 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3495 b->free_a = PETSC_FALSE; 3496 b->free_ij = PETSC_FALSE; 3497 ierr = MatDestroy(&B);CHKERRQ(ierr); 3498 3499 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3500 bnew->free_a = PETSC_TRUE; 3501 bnew->free_ij = PETSC_TRUE; 3502 3503 /* condense columns of maij->B */ 3504 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3505 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3506 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3507 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3508 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3509 PetscFunctionReturn(0); 3510 } 3511 3512 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3513 3514 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3515 { 3516 PetscErrorCode ierr; 3517 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3518 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3519 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3520 Mat M,Msub,B=a->B; 3521 MatScalar *aa; 3522 Mat_SeqAIJ *aij; 3523 PetscInt *garray = a->garray,*colsub,Ncols; 3524 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3525 IS iscol_sub,iscmap; 3526 const PetscInt *is_idx,*cmap; 3527 PetscBool allcolumns=PETSC_FALSE; 3528 MPI_Comm comm; 3529 3530 PetscFunctionBegin; 3531 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3532 if (call == MAT_REUSE_MATRIX) { 3533 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3534 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3535 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3536 3537 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3538 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3539 3540 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3541 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3542 3543 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3544 3545 } else { /* call == MAT_INITIAL_MATRIX) */ 3546 PetscBool flg; 3547 3548 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3549 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3550 3551 /* (1) iscol -> nonscalable iscol_local */ 3552 /* Check for special case: each processor gets entire matrix columns */ 3553 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3554 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3555 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3556 if (allcolumns) { 3557 iscol_sub = iscol_local; 3558 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3559 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3560 3561 } else { 3562 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3563 PetscInt *idx,*cmap1,k; 3564 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3565 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3566 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3567 count = 0; 3568 k = 0; 3569 for (i=0; i<Ncols; i++) { 3570 j = is_idx[i]; 3571 if (j >= cstart && j < cend) { 3572 /* diagonal part of mat */ 3573 idx[count] = j; 3574 cmap1[count++] = i; /* column index in submat */ 3575 } else if (Bn) { 3576 /* off-diagonal part of mat */ 3577 if (j == garray[k]) { 3578 idx[count] = j; 3579 cmap1[count++] = i; /* column index in submat */ 3580 } else if (j > garray[k]) { 3581 while (j > garray[k] && k < Bn-1) k++; 3582 if (j == garray[k]) { 3583 idx[count] = j; 3584 cmap1[count++] = i; /* column index in submat */ 3585 } 3586 } 3587 } 3588 } 3589 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3590 3591 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3592 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3593 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3594 3595 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3596 } 3597 3598 /* (3) Create sequential Msub */ 3599 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3600 } 3601 3602 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3603 aij = (Mat_SeqAIJ*)(Msub)->data; 3604 ii = aij->i; 3605 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3606 3607 /* 3608 m - number of local rows 3609 Ncols - number of columns (same on all processors) 3610 rstart - first row in new global matrix generated 3611 */ 3612 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3613 3614 if (call == MAT_INITIAL_MATRIX) { 3615 /* (4) Create parallel newmat */ 3616 PetscMPIInt rank,size; 3617 PetscInt csize; 3618 3619 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3620 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3621 3622 /* 3623 Determine the number of non-zeros in the diagonal and off-diagonal 3624 portions of the matrix in order to do correct preallocation 3625 */ 3626 3627 /* first get start and end of "diagonal" columns */ 3628 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3629 if (csize == PETSC_DECIDE) { 3630 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3631 if (mglobal == Ncols) { /* square matrix */ 3632 nlocal = m; 3633 } else { 3634 nlocal = Ncols/size + ((Ncols % size) > rank); 3635 } 3636 } else { 3637 nlocal = csize; 3638 } 3639 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3640 rstart = rend - nlocal; 3641 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3642 3643 /* next, compute all the lengths */ 3644 jj = aij->j; 3645 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3646 olens = dlens + m; 3647 for (i=0; i<m; i++) { 3648 jend = ii[i+1] - ii[i]; 3649 olen = 0; 3650 dlen = 0; 3651 for (j=0; j<jend; j++) { 3652 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3653 else dlen++; 3654 jj++; 3655 } 3656 olens[i] = olen; 3657 dlens[i] = dlen; 3658 } 3659 3660 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3661 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3662 3663 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3664 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3665 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3666 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3667 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3668 ierr = PetscFree(dlens);CHKERRQ(ierr); 3669 3670 } else { /* call == MAT_REUSE_MATRIX */ 3671 M = *newmat; 3672 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3673 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3674 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3675 /* 3676 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3677 rather than the slower MatSetValues(). 3678 */ 3679 M->was_assembled = PETSC_TRUE; 3680 M->assembled = PETSC_FALSE; 3681 } 3682 3683 /* (5) Set values of Msub to *newmat */ 3684 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3685 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3686 3687 jj = aij->j; 3688 ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3689 for (i=0; i<m; i++) { 3690 row = rstart + i; 3691 nz = ii[i+1] - ii[i]; 3692 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3693 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3694 jj += nz; aa += nz; 3695 } 3696 ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3697 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3698 3699 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3700 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3701 3702 ierr = PetscFree(colsub);CHKERRQ(ierr); 3703 3704 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3705 if (call == MAT_INITIAL_MATRIX) { 3706 *newmat = M; 3707 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3708 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3709 3710 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3711 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3712 3713 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3714 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3715 3716 if (iscol_local) { 3717 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3718 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3719 } 3720 } 3721 PetscFunctionReturn(0); 3722 } 3723 3724 /* 3725 Not great since it makes two copies of the submatrix, first an SeqAIJ 3726 in local and then by concatenating the local matrices the end result. 3727 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3728 3729 Note: This requires a sequential iscol with all indices. 3730 */ 3731 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3732 { 3733 PetscErrorCode ierr; 3734 PetscMPIInt rank,size; 3735 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3736 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3737 Mat M,Mreuse; 3738 MatScalar *aa,*vwork; 3739 MPI_Comm comm; 3740 Mat_SeqAIJ *aij; 3741 PetscBool colflag,allcolumns=PETSC_FALSE; 3742 3743 PetscFunctionBegin; 3744 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3745 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3746 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3747 3748 /* Check for special case: each processor gets entire matrix columns */ 3749 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3750 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3751 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3752 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3753 3754 if (call == MAT_REUSE_MATRIX) { 3755 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3756 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3757 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3758 } else { 3759 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3760 } 3761 3762 /* 3763 m - number of local rows 3764 n - number of columns (same on all processors) 3765 rstart - first row in new global matrix generated 3766 */ 3767 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3768 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3769 if (call == MAT_INITIAL_MATRIX) { 3770 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3771 ii = aij->i; 3772 jj = aij->j; 3773 3774 /* 3775 Determine the number of non-zeros in the diagonal and off-diagonal 3776 portions of the matrix in order to do correct preallocation 3777 */ 3778 3779 /* first get start and end of "diagonal" columns */ 3780 if (csize == PETSC_DECIDE) { 3781 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3782 if (mglobal == n) { /* square matrix */ 3783 nlocal = m; 3784 } else { 3785 nlocal = n/size + ((n % size) > rank); 3786 } 3787 } else { 3788 nlocal = csize; 3789 } 3790 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3791 rstart = rend - nlocal; 3792 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3793 3794 /* next, compute all the lengths */ 3795 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3796 olens = dlens + m; 3797 for (i=0; i<m; i++) { 3798 jend = ii[i+1] - ii[i]; 3799 olen = 0; 3800 dlen = 0; 3801 for (j=0; j<jend; j++) { 3802 if (*jj < rstart || *jj >= rend) olen++; 3803 else dlen++; 3804 jj++; 3805 } 3806 olens[i] = olen; 3807 dlens[i] = dlen; 3808 } 3809 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3810 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3811 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3812 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3813 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3814 ierr = PetscFree(dlens);CHKERRQ(ierr); 3815 } else { 3816 PetscInt ml,nl; 3817 3818 M = *newmat; 3819 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3820 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3821 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3822 /* 3823 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3824 rather than the slower MatSetValues(). 3825 */ 3826 M->was_assembled = PETSC_TRUE; 3827 M->assembled = PETSC_FALSE; 3828 } 3829 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3830 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3831 ii = aij->i; 3832 jj = aij->j; 3833 3834 /* trigger copy to CPU if needed */ 3835 ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3836 for (i=0; i<m; i++) { 3837 row = rstart + i; 3838 nz = ii[i+1] - ii[i]; 3839 cwork = jj; jj += nz; 3840 vwork = aa; aa += nz; 3841 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3842 } 3843 ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3844 3845 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3846 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3847 *newmat = M; 3848 3849 /* save submatrix used in processor for next request */ 3850 if (call == MAT_INITIAL_MATRIX) { 3851 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3852 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3853 } 3854 PetscFunctionReturn(0); 3855 } 3856 3857 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3858 { 3859 PetscInt m,cstart, cend,j,nnz,i,d; 3860 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3861 const PetscInt *JJ; 3862 PetscErrorCode ierr; 3863 PetscBool nooffprocentries; 3864 3865 PetscFunctionBegin; 3866 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3867 3868 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3869 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3870 m = B->rmap->n; 3871 cstart = B->cmap->rstart; 3872 cend = B->cmap->rend; 3873 rstart = B->rmap->rstart; 3874 3875 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3876 3877 if (PetscDefined(USE_DEBUG)) { 3878 for (i=0; i<m; i++) { 3879 nnz = Ii[i+1]- Ii[i]; 3880 JJ = J + Ii[i]; 3881 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3882 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3883 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3884 } 3885 } 3886 3887 for (i=0; i<m; i++) { 3888 nnz = Ii[i+1]- Ii[i]; 3889 JJ = J + Ii[i]; 3890 nnz_max = PetscMax(nnz_max,nnz); 3891 d = 0; 3892 for (j=0; j<nnz; j++) { 3893 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3894 } 3895 d_nnz[i] = d; 3896 o_nnz[i] = nnz - d; 3897 } 3898 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3899 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3900 3901 for (i=0; i<m; i++) { 3902 ii = i + rstart; 3903 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3904 } 3905 nooffprocentries = B->nooffprocentries; 3906 B->nooffprocentries = PETSC_TRUE; 3907 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3908 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3909 B->nooffprocentries = nooffprocentries; 3910 3911 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3912 PetscFunctionReturn(0); 3913 } 3914 3915 /*@ 3916 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3917 (the default parallel PETSc format). 3918 3919 Collective 3920 3921 Input Parameters: 3922 + B - the matrix 3923 . i - the indices into j for the start of each local row (starts with zero) 3924 . j - the column indices for each local row (starts with zero) 3925 - v - optional values in the matrix 3926 3927 Level: developer 3928 3929 Notes: 3930 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3931 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3932 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3933 3934 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3935 3936 The format which is used for the sparse matrix input, is equivalent to a 3937 row-major ordering.. i.e for the following matrix, the input data expected is 3938 as shown 3939 3940 $ 1 0 0 3941 $ 2 0 3 P0 3942 $ ------- 3943 $ 4 5 6 P1 3944 $ 3945 $ Process0 [P0]: rows_owned=[0,1] 3946 $ i = {0,1,3} [size = nrow+1 = 2+1] 3947 $ j = {0,0,2} [size = 3] 3948 $ v = {1,2,3} [size = 3] 3949 $ 3950 $ Process1 [P1]: rows_owned=[2] 3951 $ i = {0,3} [size = nrow+1 = 1+1] 3952 $ j = {0,1,2} [size = 3] 3953 $ v = {4,5,6} [size = 3] 3954 3955 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3956 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3957 @*/ 3958 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3959 { 3960 PetscErrorCode ierr; 3961 3962 PetscFunctionBegin; 3963 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3964 PetscFunctionReturn(0); 3965 } 3966 3967 /*@C 3968 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3969 (the default parallel PETSc format). For good matrix assembly performance 3970 the user should preallocate the matrix storage by setting the parameters 3971 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3972 performance can be increased by more than a factor of 50. 3973 3974 Collective 3975 3976 Input Parameters: 3977 + B - the matrix 3978 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3979 (same value is used for all local rows) 3980 . d_nnz - array containing the number of nonzeros in the various rows of the 3981 DIAGONAL portion of the local submatrix (possibly different for each row) 3982 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3983 The size of this array is equal to the number of local rows, i.e 'm'. 3984 For matrices that will be factored, you must leave room for (and set) 3985 the diagonal entry even if it is zero. 3986 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3987 submatrix (same value is used for all local rows). 3988 - o_nnz - array containing the number of nonzeros in the various rows of the 3989 OFF-DIAGONAL portion of the local submatrix (possibly different for 3990 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3991 structure. The size of this array is equal to the number 3992 of local rows, i.e 'm'. 3993 3994 If the *_nnz parameter is given then the *_nz parameter is ignored 3995 3996 The AIJ format (also called the Yale sparse matrix format or 3997 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3998 storage. The stored row and column indices begin with zero. 3999 See Users-Manual: ch_mat for details. 4000 4001 The parallel matrix is partitioned such that the first m0 rows belong to 4002 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4003 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4004 4005 The DIAGONAL portion of the local submatrix of a processor can be defined 4006 as the submatrix which is obtained by extraction the part corresponding to 4007 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4008 first row that belongs to the processor, r2 is the last row belonging to 4009 the this processor, and c1-c2 is range of indices of the local part of a 4010 vector suitable for applying the matrix to. This is an mxn matrix. In the 4011 common case of a square matrix, the row and column ranges are the same and 4012 the DIAGONAL part is also square. The remaining portion of the local 4013 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4014 4015 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4016 4017 You can call MatGetInfo() to get information on how effective the preallocation was; 4018 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4019 You can also run with the option -info and look for messages with the string 4020 malloc in them to see if additional memory allocation was needed. 4021 4022 Example usage: 4023 4024 Consider the following 8x8 matrix with 34 non-zero values, that is 4025 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4026 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4027 as follows: 4028 4029 .vb 4030 1 2 0 | 0 3 0 | 0 4 4031 Proc0 0 5 6 | 7 0 0 | 8 0 4032 9 0 10 | 11 0 0 | 12 0 4033 ------------------------------------- 4034 13 0 14 | 15 16 17 | 0 0 4035 Proc1 0 18 0 | 19 20 21 | 0 0 4036 0 0 0 | 22 23 0 | 24 0 4037 ------------------------------------- 4038 Proc2 25 26 27 | 0 0 28 | 29 0 4039 30 0 0 | 31 32 33 | 0 34 4040 .ve 4041 4042 This can be represented as a collection of submatrices as: 4043 4044 .vb 4045 A B C 4046 D E F 4047 G H I 4048 .ve 4049 4050 Where the submatrices A,B,C are owned by proc0, D,E,F are 4051 owned by proc1, G,H,I are owned by proc2. 4052 4053 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4054 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4055 The 'M','N' parameters are 8,8, and have the same values on all procs. 4056 4057 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4058 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4059 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4060 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4061 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4062 matrix, ans [DF] as another SeqAIJ matrix. 4063 4064 When d_nz, o_nz parameters are specified, d_nz storage elements are 4065 allocated for every row of the local diagonal submatrix, and o_nz 4066 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4067 One way to choose d_nz and o_nz is to use the max nonzerors per local 4068 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4069 In this case, the values of d_nz,o_nz are: 4070 .vb 4071 proc0 : dnz = 2, o_nz = 2 4072 proc1 : dnz = 3, o_nz = 2 4073 proc2 : dnz = 1, o_nz = 4 4074 .ve 4075 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4076 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4077 for proc3. i.e we are using 12+15+10=37 storage locations to store 4078 34 values. 4079 4080 When d_nnz, o_nnz parameters are specified, the storage is specified 4081 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4082 In the above case the values for d_nnz,o_nnz are: 4083 .vb 4084 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4085 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4086 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4087 .ve 4088 Here the space allocated is sum of all the above values i.e 34, and 4089 hence pre-allocation is perfect. 4090 4091 Level: intermediate 4092 4093 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4094 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4095 @*/ 4096 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4097 { 4098 PetscErrorCode ierr; 4099 4100 PetscFunctionBegin; 4101 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4102 PetscValidType(B,1); 4103 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4104 PetscFunctionReturn(0); 4105 } 4106 4107 /*@ 4108 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4109 CSR format for the local rows. 4110 4111 Collective 4112 4113 Input Parameters: 4114 + comm - MPI communicator 4115 . m - number of local rows (Cannot be PETSC_DECIDE) 4116 . n - This value should be the same as the local size used in creating the 4117 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4118 calculated if N is given) For square matrices n is almost always m. 4119 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4120 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4121 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4122 . j - column indices 4123 - a - matrix values 4124 4125 Output Parameter: 4126 . mat - the matrix 4127 4128 Level: intermediate 4129 4130 Notes: 4131 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4132 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4133 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4134 4135 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4136 4137 The format which is used for the sparse matrix input, is equivalent to a 4138 row-major ordering.. i.e for the following matrix, the input data expected is 4139 as shown 4140 4141 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4142 4143 $ 1 0 0 4144 $ 2 0 3 P0 4145 $ ------- 4146 $ 4 5 6 P1 4147 $ 4148 $ Process0 [P0]: rows_owned=[0,1] 4149 $ i = {0,1,3} [size = nrow+1 = 2+1] 4150 $ j = {0,0,2} [size = 3] 4151 $ v = {1,2,3} [size = 3] 4152 $ 4153 $ Process1 [P1]: rows_owned=[2] 4154 $ i = {0,3} [size = nrow+1 = 1+1] 4155 $ j = {0,1,2} [size = 3] 4156 $ v = {4,5,6} [size = 3] 4157 4158 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4159 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4160 @*/ 4161 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4162 { 4163 PetscErrorCode ierr; 4164 4165 PetscFunctionBegin; 4166 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4167 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4168 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4169 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4170 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4171 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4172 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4173 PetscFunctionReturn(0); 4174 } 4175 4176 /*@ 4177 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4178 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4179 4180 Collective 4181 4182 Input Parameters: 4183 + mat - the matrix 4184 . m - number of local rows (Cannot be PETSC_DECIDE) 4185 . n - This value should be the same as the local size used in creating the 4186 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4187 calculated if N is given) For square matrices n is almost always m. 4188 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4189 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4190 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4191 . J - column indices 4192 - v - matrix values 4193 4194 Level: intermediate 4195 4196 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4197 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4198 @*/ 4199 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4200 { 4201 PetscErrorCode ierr; 4202 PetscInt cstart,nnz,i,j; 4203 PetscInt *ld; 4204 PetscBool nooffprocentries; 4205 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4206 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4207 PetscScalar *ad,*ao; 4208 const PetscInt *Adi = Ad->i; 4209 PetscInt ldi,Iii,md; 4210 4211 PetscFunctionBegin; 4212 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4213 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4214 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4215 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4216 4217 ierr = MatSeqAIJGetArrayWrite(Aij->A,&ad);CHKERRQ(ierr); 4218 ierr = MatSeqAIJGetArrayWrite(Aij->B,&ao);CHKERRQ(ierr); 4219 cstart = mat->cmap->rstart; 4220 if (!Aij->ld) { 4221 /* count number of entries below block diagonal */ 4222 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4223 Aij->ld = ld; 4224 for (i=0; i<m; i++) { 4225 nnz = Ii[i+1]- Ii[i]; 4226 j = 0; 4227 while (J[j] < cstart && j < nnz) {j++;} 4228 J += nnz; 4229 ld[i] = j; 4230 } 4231 } else { 4232 ld = Aij->ld; 4233 } 4234 4235 for (i=0; i<m; i++) { 4236 nnz = Ii[i+1]- Ii[i]; 4237 Iii = Ii[i]; 4238 ldi = ld[i]; 4239 md = Adi[i+1]-Adi[i]; 4240 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4241 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4242 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4243 ad += md; 4244 ao += nnz - md; 4245 } 4246 nooffprocentries = mat->nooffprocentries; 4247 mat->nooffprocentries = PETSC_TRUE; 4248 ierr = MatSeqAIJRestoreArrayWrite(Aij->A,&ad);CHKERRQ(ierr); 4249 ierr = MatSeqAIJRestoreArrayWrite(Aij->B,&ao);CHKERRQ(ierr); 4250 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4251 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4252 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4253 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4254 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4255 mat->nooffprocentries = nooffprocentries; 4256 PetscFunctionReturn(0); 4257 } 4258 4259 /*@C 4260 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4261 (the default parallel PETSc format). For good matrix assembly performance 4262 the user should preallocate the matrix storage by setting the parameters 4263 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4264 performance can be increased by more than a factor of 50. 4265 4266 Collective 4267 4268 Input Parameters: 4269 + comm - MPI communicator 4270 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4271 This value should be the same as the local size used in creating the 4272 y vector for the matrix-vector product y = Ax. 4273 . n - This value should be the same as the local size used in creating the 4274 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4275 calculated if N is given) For square matrices n is almost always m. 4276 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4277 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4278 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4279 (same value is used for all local rows) 4280 . d_nnz - array containing the number of nonzeros in the various rows of the 4281 DIAGONAL portion of the local submatrix (possibly different for each row) 4282 or NULL, if d_nz is used to specify the nonzero structure. 4283 The size of this array is equal to the number of local rows, i.e 'm'. 4284 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4285 submatrix (same value is used for all local rows). 4286 - o_nnz - array containing the number of nonzeros in the various rows of the 4287 OFF-DIAGONAL portion of the local submatrix (possibly different for 4288 each row) or NULL, if o_nz is used to specify the nonzero 4289 structure. The size of this array is equal to the number 4290 of local rows, i.e 'm'. 4291 4292 Output Parameter: 4293 . A - the matrix 4294 4295 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4296 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4297 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4298 4299 Notes: 4300 If the *_nnz parameter is given then the *_nz parameter is ignored 4301 4302 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4303 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4304 storage requirements for this matrix. 4305 4306 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4307 processor than it must be used on all processors that share the object for 4308 that argument. 4309 4310 The user MUST specify either the local or global matrix dimensions 4311 (possibly both). 4312 4313 The parallel matrix is partitioned across processors such that the 4314 first m0 rows belong to process 0, the next m1 rows belong to 4315 process 1, the next m2 rows belong to process 2 etc.. where 4316 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4317 values corresponding to [m x N] submatrix. 4318 4319 The columns are logically partitioned with the n0 columns belonging 4320 to 0th partition, the next n1 columns belonging to the next 4321 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4322 4323 The DIAGONAL portion of the local submatrix on any given processor 4324 is the submatrix corresponding to the rows and columns m,n 4325 corresponding to the given processor. i.e diagonal matrix on 4326 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4327 etc. The remaining portion of the local submatrix [m x (N-n)] 4328 constitute the OFF-DIAGONAL portion. The example below better 4329 illustrates this concept. 4330 4331 For a square global matrix we define each processor's diagonal portion 4332 to be its local rows and the corresponding columns (a square submatrix); 4333 each processor's off-diagonal portion encompasses the remainder of the 4334 local matrix (a rectangular submatrix). 4335 4336 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4337 4338 When calling this routine with a single process communicator, a matrix of 4339 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4340 type of communicator, use the construction mechanism 4341 .vb 4342 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4343 .ve 4344 4345 $ MatCreate(...,&A); 4346 $ MatSetType(A,MATMPIAIJ); 4347 $ MatSetSizes(A, m,n,M,N); 4348 $ MatMPIAIJSetPreallocation(A,...); 4349 4350 By default, this format uses inodes (identical nodes) when possible. 4351 We search for consecutive rows with the same nonzero structure, thereby 4352 reusing matrix information to achieve increased efficiency. 4353 4354 Options Database Keys: 4355 + -mat_no_inode - Do not use inodes 4356 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4357 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4358 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4359 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4360 4361 Example usage: 4362 4363 Consider the following 8x8 matrix with 34 non-zero values, that is 4364 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4365 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4366 as follows 4367 4368 .vb 4369 1 2 0 | 0 3 0 | 0 4 4370 Proc0 0 5 6 | 7 0 0 | 8 0 4371 9 0 10 | 11 0 0 | 12 0 4372 ------------------------------------- 4373 13 0 14 | 15 16 17 | 0 0 4374 Proc1 0 18 0 | 19 20 21 | 0 0 4375 0 0 0 | 22 23 0 | 24 0 4376 ------------------------------------- 4377 Proc2 25 26 27 | 0 0 28 | 29 0 4378 30 0 0 | 31 32 33 | 0 34 4379 .ve 4380 4381 This can be represented as a collection of submatrices as 4382 4383 .vb 4384 A B C 4385 D E F 4386 G H I 4387 .ve 4388 4389 Where the submatrices A,B,C are owned by proc0, D,E,F are 4390 owned by proc1, G,H,I are owned by proc2. 4391 4392 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4393 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4394 The 'M','N' parameters are 8,8, and have the same values on all procs. 4395 4396 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4397 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4398 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4399 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4400 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4401 matrix, ans [DF] as another SeqAIJ matrix. 4402 4403 When d_nz, o_nz parameters are specified, d_nz storage elements are 4404 allocated for every row of the local diagonal submatrix, and o_nz 4405 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4406 One way to choose d_nz and o_nz is to use the max nonzerors per local 4407 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4408 In this case, the values of d_nz,o_nz are 4409 .vb 4410 proc0 : dnz = 2, o_nz = 2 4411 proc1 : dnz = 3, o_nz = 2 4412 proc2 : dnz = 1, o_nz = 4 4413 .ve 4414 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4415 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4416 for proc3. i.e we are using 12+15+10=37 storage locations to store 4417 34 values. 4418 4419 When d_nnz, o_nnz parameters are specified, the storage is specified 4420 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4421 In the above case the values for d_nnz,o_nnz are 4422 .vb 4423 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4424 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4425 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4426 .ve 4427 Here the space allocated is sum of all the above values i.e 34, and 4428 hence pre-allocation is perfect. 4429 4430 Level: intermediate 4431 4432 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4433 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4434 @*/ 4435 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4436 { 4437 PetscErrorCode ierr; 4438 PetscMPIInt size; 4439 4440 PetscFunctionBegin; 4441 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4442 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4443 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4444 if (size > 1) { 4445 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4446 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4447 } else { 4448 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4449 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4450 } 4451 PetscFunctionReturn(0); 4452 } 4453 4454 /*@C 4455 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4456 4457 Not collective 4458 4459 Input Parameter: 4460 . A - The MPIAIJ matrix 4461 4462 Output Parameters: 4463 + Ad - The local diagonal block as a SeqAIJ matrix 4464 . Ao - The local off-diagonal block as a SeqAIJ matrix 4465 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4466 4467 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4468 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4469 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4470 local column numbers to global column numbers in the original matrix. 4471 4472 Level: intermediate 4473 4474 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4475 @*/ 4476 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4477 { 4478 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4479 PetscBool flg; 4480 PetscErrorCode ierr; 4481 4482 PetscFunctionBegin; 4483 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4484 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4485 if (Ad) *Ad = a->A; 4486 if (Ao) *Ao = a->B; 4487 if (colmap) *colmap = a->garray; 4488 PetscFunctionReturn(0); 4489 } 4490 4491 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4492 { 4493 PetscErrorCode ierr; 4494 PetscInt m,N,i,rstart,nnz,Ii; 4495 PetscInt *indx; 4496 PetscScalar *values; 4497 MatType rootType; 4498 4499 PetscFunctionBegin; 4500 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4501 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4502 PetscInt *dnz,*onz,sum,bs,cbs; 4503 4504 if (n == PETSC_DECIDE) { 4505 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4506 } 4507 /* Check sum(n) = N */ 4508 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4509 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4510 4511 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4512 rstart -= m; 4513 4514 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4515 for (i=0; i<m; i++) { 4516 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4517 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4518 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4519 } 4520 4521 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4522 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4523 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4524 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4525 ierr = MatGetRootType_Private(inmat,&rootType);CHKERRQ(ierr); 4526 ierr = MatSetType(*outmat,rootType);CHKERRQ(ierr); 4527 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4528 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4529 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4530 ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4531 } 4532 4533 /* numeric phase */ 4534 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4535 for (i=0; i<m; i++) { 4536 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4537 Ii = i + rstart; 4538 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4539 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4540 } 4541 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4542 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4543 PetscFunctionReturn(0); 4544 } 4545 4546 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4547 { 4548 PetscErrorCode ierr; 4549 PetscMPIInt rank; 4550 PetscInt m,N,i,rstart,nnz; 4551 size_t len; 4552 const PetscInt *indx; 4553 PetscViewer out; 4554 char *name; 4555 Mat B; 4556 const PetscScalar *values; 4557 4558 PetscFunctionBegin; 4559 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4560 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4561 /* Should this be the type of the diagonal block of A? */ 4562 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4563 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4564 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4565 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4566 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4567 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4568 for (i=0; i<m; i++) { 4569 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4570 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4571 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4572 } 4573 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4574 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4575 4576 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 4577 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4578 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4579 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4580 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4581 ierr = PetscFree(name);CHKERRQ(ierr); 4582 ierr = MatView(B,out);CHKERRQ(ierr); 4583 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4584 ierr = MatDestroy(&B);CHKERRQ(ierr); 4585 PetscFunctionReturn(0); 4586 } 4587 4588 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4589 { 4590 PetscErrorCode ierr; 4591 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4592 4593 PetscFunctionBegin; 4594 if (!merge) PetscFunctionReturn(0); 4595 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4596 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4597 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4598 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4599 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4600 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4601 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4602 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4603 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4604 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4605 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4606 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4607 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4608 ierr = PetscFree(merge);CHKERRQ(ierr); 4609 PetscFunctionReturn(0); 4610 } 4611 4612 #include <../src/mat/utils/freespace.h> 4613 #include <petscbt.h> 4614 4615 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4616 { 4617 PetscErrorCode ierr; 4618 MPI_Comm comm; 4619 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4620 PetscMPIInt size,rank,taga,*len_s; 4621 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4622 PetscInt proc,m; 4623 PetscInt **buf_ri,**buf_rj; 4624 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4625 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4626 MPI_Request *s_waits,*r_waits; 4627 MPI_Status *status; 4628 const MatScalar *aa,*a_a; 4629 MatScalar **abuf_r,*ba_i; 4630 Mat_Merge_SeqsToMPI *merge; 4631 PetscContainer container; 4632 4633 PetscFunctionBegin; 4634 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4635 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4636 4637 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4638 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4639 4640 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4641 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4642 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4643 ierr = MatSeqAIJGetArrayRead(seqmat,&a_a);CHKERRQ(ierr); 4644 aa = a_a; 4645 4646 bi = merge->bi; 4647 bj = merge->bj; 4648 buf_ri = merge->buf_ri; 4649 buf_rj = merge->buf_rj; 4650 4651 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4652 owners = merge->rowmap->range; 4653 len_s = merge->len_s; 4654 4655 /* send and recv matrix values */ 4656 /*-----------------------------*/ 4657 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4658 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4659 4660 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4661 for (proc=0,k=0; proc<size; proc++) { 4662 if (!len_s[proc]) continue; 4663 i = owners[proc]; 4664 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr); 4665 k++; 4666 } 4667 4668 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);} 4669 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);} 4670 ierr = PetscFree(status);CHKERRQ(ierr); 4671 4672 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4673 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4674 4675 /* insert mat values of mpimat */ 4676 /*----------------------------*/ 4677 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4678 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4679 4680 for (k=0; k<merge->nrecv; k++) { 4681 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4682 nrows = *(buf_ri_k[k]); 4683 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4684 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4685 } 4686 4687 /* set values of ba */ 4688 m = merge->rowmap->n; 4689 for (i=0; i<m; i++) { 4690 arow = owners[rank] + i; 4691 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4692 bnzi = bi[i+1] - bi[i]; 4693 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4694 4695 /* add local non-zero vals of this proc's seqmat into ba */ 4696 anzi = ai[arow+1] - ai[arow]; 4697 aj = a->j + ai[arow]; 4698 aa = a_a + ai[arow]; 4699 nextaj = 0; 4700 for (j=0; nextaj<anzi; j++) { 4701 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4702 ba_i[j] += aa[nextaj++]; 4703 } 4704 } 4705 4706 /* add received vals into ba */ 4707 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4708 /* i-th row */ 4709 if (i == *nextrow[k]) { 4710 anzi = *(nextai[k]+1) - *nextai[k]; 4711 aj = buf_rj[k] + *(nextai[k]); 4712 aa = abuf_r[k] + *(nextai[k]); 4713 nextaj = 0; 4714 for (j=0; nextaj<anzi; j++) { 4715 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4716 ba_i[j] += aa[nextaj++]; 4717 } 4718 } 4719 nextrow[k]++; nextai[k]++; 4720 } 4721 } 4722 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4723 } 4724 ierr = MatSeqAIJRestoreArrayRead(seqmat,&a_a);CHKERRQ(ierr); 4725 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4726 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4727 4728 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4729 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4730 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4731 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4732 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4733 PetscFunctionReturn(0); 4734 } 4735 4736 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4737 { 4738 PetscErrorCode ierr; 4739 Mat B_mpi; 4740 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4741 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4742 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4743 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4744 PetscInt len,proc,*dnz,*onz,bs,cbs; 4745 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4746 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4747 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4748 MPI_Status *status; 4749 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4750 PetscBT lnkbt; 4751 Mat_Merge_SeqsToMPI *merge; 4752 PetscContainer container; 4753 4754 PetscFunctionBegin; 4755 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4756 4757 /* make sure it is a PETSc comm */ 4758 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4759 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4760 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4761 4762 ierr = PetscNew(&merge);CHKERRQ(ierr); 4763 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4764 4765 /* determine row ownership */ 4766 /*---------------------------------------------------------*/ 4767 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4768 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4769 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4770 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4771 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4772 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4773 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4774 4775 m = merge->rowmap->n; 4776 owners = merge->rowmap->range; 4777 4778 /* determine the number of messages to send, their lengths */ 4779 /*---------------------------------------------------------*/ 4780 len_s = merge->len_s; 4781 4782 len = 0; /* length of buf_si[] */ 4783 merge->nsend = 0; 4784 for (proc=0; proc<size; proc++) { 4785 len_si[proc] = 0; 4786 if (proc == rank) { 4787 len_s[proc] = 0; 4788 } else { 4789 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4790 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4791 } 4792 if (len_s[proc]) { 4793 merge->nsend++; 4794 nrows = 0; 4795 for (i=owners[proc]; i<owners[proc+1]; i++) { 4796 if (ai[i+1] > ai[i]) nrows++; 4797 } 4798 len_si[proc] = 2*(nrows+1); 4799 len += len_si[proc]; 4800 } 4801 } 4802 4803 /* determine the number and length of messages to receive for ij-structure */ 4804 /*-------------------------------------------------------------------------*/ 4805 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4806 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4807 4808 /* post the Irecv of j-structure */ 4809 /*-------------------------------*/ 4810 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4811 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4812 4813 /* post the Isend of j-structure */ 4814 /*--------------------------------*/ 4815 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4816 4817 for (proc=0, k=0; proc<size; proc++) { 4818 if (!len_s[proc]) continue; 4819 i = owners[proc]; 4820 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr); 4821 k++; 4822 } 4823 4824 /* receives and sends of j-structure are complete */ 4825 /*------------------------------------------------*/ 4826 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);} 4827 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);} 4828 4829 /* send and recv i-structure */ 4830 /*---------------------------*/ 4831 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4832 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4833 4834 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4835 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4836 for (proc=0,k=0; proc<size; proc++) { 4837 if (!len_s[proc]) continue; 4838 /* form outgoing message for i-structure: 4839 buf_si[0]: nrows to be sent 4840 [1:nrows]: row index (global) 4841 [nrows+1:2*nrows+1]: i-structure index 4842 */ 4843 /*-------------------------------------------*/ 4844 nrows = len_si[proc]/2 - 1; 4845 buf_si_i = buf_si + nrows+1; 4846 buf_si[0] = nrows; 4847 buf_si_i[0] = 0; 4848 nrows = 0; 4849 for (i=owners[proc]; i<owners[proc+1]; i++) { 4850 anzi = ai[i+1] - ai[i]; 4851 if (anzi) { 4852 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4853 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4854 nrows++; 4855 } 4856 } 4857 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr); 4858 k++; 4859 buf_si += len_si[proc]; 4860 } 4861 4862 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);} 4863 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);} 4864 4865 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4866 for (i=0; i<merge->nrecv; i++) { 4867 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4868 } 4869 4870 ierr = PetscFree(len_si);CHKERRQ(ierr); 4871 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4872 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4873 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4874 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4875 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4876 ierr = PetscFree(status);CHKERRQ(ierr); 4877 4878 /* compute a local seq matrix in each processor */ 4879 /*----------------------------------------------*/ 4880 /* allocate bi array and free space for accumulating nonzero column info */ 4881 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4882 bi[0] = 0; 4883 4884 /* create and initialize a linked list */ 4885 nlnk = N+1; 4886 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4887 4888 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4889 len = ai[owners[rank+1]] - ai[owners[rank]]; 4890 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4891 4892 current_space = free_space; 4893 4894 /* determine symbolic info for each local row */ 4895 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4896 4897 for (k=0; k<merge->nrecv; k++) { 4898 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4899 nrows = *buf_ri_k[k]; 4900 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4901 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4902 } 4903 4904 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4905 len = 0; 4906 for (i=0; i<m; i++) { 4907 bnzi = 0; 4908 /* add local non-zero cols of this proc's seqmat into lnk */ 4909 arow = owners[rank] + i; 4910 anzi = ai[arow+1] - ai[arow]; 4911 aj = a->j + ai[arow]; 4912 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4913 bnzi += nlnk; 4914 /* add received col data into lnk */ 4915 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4916 if (i == *nextrow[k]) { /* i-th row */ 4917 anzi = *(nextai[k]+1) - *nextai[k]; 4918 aj = buf_rj[k] + *nextai[k]; 4919 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4920 bnzi += nlnk; 4921 nextrow[k]++; nextai[k]++; 4922 } 4923 } 4924 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4925 4926 /* if free space is not available, make more free space */ 4927 if (current_space->local_remaining<bnzi) { 4928 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4929 nspacedouble++; 4930 } 4931 /* copy data into free space, then initialize lnk */ 4932 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4933 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4934 4935 current_space->array += bnzi; 4936 current_space->local_used += bnzi; 4937 current_space->local_remaining -= bnzi; 4938 4939 bi[i+1] = bi[i] + bnzi; 4940 } 4941 4942 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4943 4944 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4945 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4946 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4947 4948 /* create symbolic parallel matrix B_mpi */ 4949 /*---------------------------------------*/ 4950 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4951 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4952 if (n==PETSC_DECIDE) { 4953 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4954 } else { 4955 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4956 } 4957 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4958 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4959 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4960 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4961 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4962 4963 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4964 B_mpi->assembled = PETSC_FALSE; 4965 merge->bi = bi; 4966 merge->bj = bj; 4967 merge->buf_ri = buf_ri; 4968 merge->buf_rj = buf_rj; 4969 merge->coi = NULL; 4970 merge->coj = NULL; 4971 merge->owners_co = NULL; 4972 4973 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4974 4975 /* attach the supporting struct to B_mpi for reuse */ 4976 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4977 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4978 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 4979 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4980 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4981 *mpimat = B_mpi; 4982 4983 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4984 PetscFunctionReturn(0); 4985 } 4986 4987 /*@C 4988 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4989 matrices from each processor 4990 4991 Collective 4992 4993 Input Parameters: 4994 + comm - the communicators the parallel matrix will live on 4995 . seqmat - the input sequential matrices 4996 . m - number of local rows (or PETSC_DECIDE) 4997 . n - number of local columns (or PETSC_DECIDE) 4998 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4999 5000 Output Parameter: 5001 . mpimat - the parallel matrix generated 5002 5003 Level: advanced 5004 5005 Notes: 5006 The dimensions of the sequential matrix in each processor MUST be the same. 5007 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5008 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5009 @*/ 5010 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5011 { 5012 PetscErrorCode ierr; 5013 PetscMPIInt size; 5014 5015 PetscFunctionBegin; 5016 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5017 if (size == 1) { 5018 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5019 if (scall == MAT_INITIAL_MATRIX) { 5020 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5021 } else { 5022 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5023 } 5024 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5025 PetscFunctionReturn(0); 5026 } 5027 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5028 if (scall == MAT_INITIAL_MATRIX) { 5029 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5030 } 5031 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5032 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5033 PetscFunctionReturn(0); 5034 } 5035 5036 /*@ 5037 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5038 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5039 with MatGetSize() 5040 5041 Not Collective 5042 5043 Input Parameters: 5044 + A - the matrix 5045 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5046 5047 Output Parameter: 5048 . A_loc - the local sequential matrix generated 5049 5050 Level: developer 5051 5052 Notes: 5053 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5054 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5055 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5056 modify the values of the returned A_loc. 5057 5058 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 5059 @*/ 5060 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5061 { 5062 PetscErrorCode ierr; 5063 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5064 Mat_SeqAIJ *mat,*a,*b; 5065 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5066 const PetscScalar *aa,*ba,*aav,*bav; 5067 PetscScalar *ca,*cam; 5068 PetscMPIInt size; 5069 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5070 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5071 PetscBool match; 5072 5073 PetscFunctionBegin; 5074 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5075 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5076 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5077 if (size == 1) { 5078 if (scall == MAT_INITIAL_MATRIX) { 5079 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5080 *A_loc = mpimat->A; 5081 } else if (scall == MAT_REUSE_MATRIX) { 5082 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5083 } 5084 PetscFunctionReturn(0); 5085 } 5086 5087 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5088 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5089 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5090 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5091 ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5092 ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5093 aa = aav; 5094 ba = bav; 5095 if (scall == MAT_INITIAL_MATRIX) { 5096 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5097 ci[0] = 0; 5098 for (i=0; i<am; i++) { 5099 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5100 } 5101 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5102 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5103 k = 0; 5104 for (i=0; i<am; i++) { 5105 ncols_o = bi[i+1] - bi[i]; 5106 ncols_d = ai[i+1] - ai[i]; 5107 /* off-diagonal portion of A */ 5108 for (jo=0; jo<ncols_o; jo++) { 5109 col = cmap[*bj]; 5110 if (col >= cstart) break; 5111 cj[k] = col; bj++; 5112 ca[k++] = *ba++; 5113 } 5114 /* diagonal portion of A */ 5115 for (j=0; j<ncols_d; j++) { 5116 cj[k] = cstart + *aj++; 5117 ca[k++] = *aa++; 5118 } 5119 /* off-diagonal portion of A */ 5120 for (j=jo; j<ncols_o; j++) { 5121 cj[k] = cmap[*bj++]; 5122 ca[k++] = *ba++; 5123 } 5124 } 5125 /* put together the new matrix */ 5126 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5127 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5128 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5129 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5130 mat->free_a = PETSC_TRUE; 5131 mat->free_ij = PETSC_TRUE; 5132 mat->nonew = 0; 5133 } else if (scall == MAT_REUSE_MATRIX) { 5134 mat =(Mat_SeqAIJ*)(*A_loc)->data; 5135 ci = mat->i; 5136 cj = mat->j; 5137 ierr = MatSeqAIJGetArrayWrite(*A_loc,&cam);CHKERRQ(ierr); 5138 for (i=0; i<am; i++) { 5139 /* off-diagonal portion of A */ 5140 ncols_o = bi[i+1] - bi[i]; 5141 for (jo=0; jo<ncols_o; jo++) { 5142 col = cmap[*bj]; 5143 if (col >= cstart) break; 5144 *cam++ = *ba++; bj++; 5145 } 5146 /* diagonal portion of A */ 5147 ncols_d = ai[i+1] - ai[i]; 5148 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5149 /* off-diagonal portion of A */ 5150 for (j=jo; j<ncols_o; j++) { 5151 *cam++ = *ba++; bj++; 5152 } 5153 } 5154 ierr = MatSeqAIJRestoreArrayWrite(*A_loc,&cam);CHKERRQ(ierr); 5155 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5156 ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5157 ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5158 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5159 PetscFunctionReturn(0); 5160 } 5161 5162 /*@ 5163 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5164 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5165 5166 Not Collective 5167 5168 Input Parameters: 5169 + A - the matrix 5170 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5171 5172 Output Parameters: 5173 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5174 - A_loc - the local sequential matrix generated 5175 5176 Level: developer 5177 5178 Notes: 5179 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5180 5181 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5182 5183 @*/ 5184 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5185 { 5186 PetscErrorCode ierr; 5187 Mat Ao,Ad; 5188 const PetscInt *cmap; 5189 PetscMPIInt size; 5190 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5191 5192 PetscFunctionBegin; 5193 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr); 5194 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5195 if (size == 1) { 5196 if (scall == MAT_INITIAL_MATRIX) { 5197 ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr); 5198 *A_loc = Ad; 5199 } else if (scall == MAT_REUSE_MATRIX) { 5200 ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5201 } 5202 if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); } 5203 PetscFunctionReturn(0); 5204 } 5205 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr); 5206 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5207 if (f) { 5208 ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr); 5209 } else { 5210 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5211 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5212 Mat_SeqAIJ *c; 5213 PetscInt *ai = a->i, *aj = a->j; 5214 PetscInt *bi = b->i, *bj = b->j; 5215 PetscInt *ci,*cj; 5216 const PetscScalar *aa,*ba; 5217 PetscScalar *ca; 5218 PetscInt i,j,am,dn,on; 5219 5220 ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr); 5221 ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr); 5222 ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr); 5223 ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr); 5224 if (scall == MAT_INITIAL_MATRIX) { 5225 PetscInt k; 5226 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5227 ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr); 5228 ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr); 5229 ci[0] = 0; 5230 for (i=0,k=0; i<am; i++) { 5231 const PetscInt ncols_o = bi[i+1] - bi[i]; 5232 const PetscInt ncols_d = ai[i+1] - ai[i]; 5233 ci[i+1] = ci[i] + ncols_o + ncols_d; 5234 /* diagonal portion of A */ 5235 for (j=0; j<ncols_d; j++,k++) { 5236 cj[k] = *aj++; 5237 ca[k] = *aa++; 5238 } 5239 /* off-diagonal portion of A */ 5240 for (j=0; j<ncols_o; j++,k++) { 5241 cj[k] = dn + *bj++; 5242 ca[k] = *ba++; 5243 } 5244 } 5245 /* put together the new matrix */ 5246 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr); 5247 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5248 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5249 c = (Mat_SeqAIJ*)(*A_loc)->data; 5250 c->free_a = PETSC_TRUE; 5251 c->free_ij = PETSC_TRUE; 5252 c->nonew = 0; 5253 ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr); 5254 } else if (scall == MAT_REUSE_MATRIX) { 5255 ierr = MatSeqAIJGetArrayWrite(*A_loc,&ca);CHKERRQ(ierr); 5256 for (i=0; i<am; i++) { 5257 const PetscInt ncols_d = ai[i+1] - ai[i]; 5258 const PetscInt ncols_o = bi[i+1] - bi[i]; 5259 /* diagonal portion of A */ 5260 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5261 /* off-diagonal portion of A */ 5262 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5263 } 5264 ierr = MatSeqAIJRestoreArrayWrite(*A_loc,&ca);CHKERRQ(ierr); 5265 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5266 ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr); 5267 ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr); 5268 if (glob) { 5269 PetscInt cst, *gidx; 5270 5271 ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr); 5272 ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr); 5273 for (i=0; i<dn; i++) gidx[i] = cst + i; 5274 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5275 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr); 5276 } 5277 } 5278 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5279 PetscFunctionReturn(0); 5280 } 5281 5282 /*@C 5283 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5284 5285 Not Collective 5286 5287 Input Parameters: 5288 + A - the matrix 5289 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5290 - row, col - index sets of rows and columns to extract (or NULL) 5291 5292 Output Parameter: 5293 . A_loc - the local sequential matrix generated 5294 5295 Level: developer 5296 5297 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5298 5299 @*/ 5300 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5301 { 5302 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5303 PetscErrorCode ierr; 5304 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5305 IS isrowa,iscola; 5306 Mat *aloc; 5307 PetscBool match; 5308 5309 PetscFunctionBegin; 5310 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5311 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5312 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5313 if (!row) { 5314 start = A->rmap->rstart; end = A->rmap->rend; 5315 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5316 } else { 5317 isrowa = *row; 5318 } 5319 if (!col) { 5320 start = A->cmap->rstart; 5321 cmap = a->garray; 5322 nzA = a->A->cmap->n; 5323 nzB = a->B->cmap->n; 5324 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5325 ncols = 0; 5326 for (i=0; i<nzB; i++) { 5327 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5328 else break; 5329 } 5330 imark = i; 5331 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5332 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5333 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5334 } else { 5335 iscola = *col; 5336 } 5337 if (scall != MAT_INITIAL_MATRIX) { 5338 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5339 aloc[0] = *A_loc; 5340 } 5341 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5342 if (!col) { /* attach global id of condensed columns */ 5343 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5344 } 5345 *A_loc = aloc[0]; 5346 ierr = PetscFree(aloc);CHKERRQ(ierr); 5347 if (!row) { 5348 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5349 } 5350 if (!col) { 5351 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5352 } 5353 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5354 PetscFunctionReturn(0); 5355 } 5356 5357 /* 5358 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5359 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5360 * on a global size. 5361 * */ 5362 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5363 { 5364 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5365 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5366 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5367 PetscMPIInt owner; 5368 PetscSFNode *iremote,*oiremote; 5369 const PetscInt *lrowindices; 5370 PetscErrorCode ierr; 5371 PetscSF sf,osf; 5372 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5373 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5374 MPI_Comm comm; 5375 ISLocalToGlobalMapping mapping; 5376 const PetscScalar *pd_a,*po_a; 5377 5378 PetscFunctionBegin; 5379 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5380 /* plocalsize is the number of roots 5381 * nrows is the number of leaves 5382 * */ 5383 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5384 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5385 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5386 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5387 for (i=0;i<nrows;i++) { 5388 /* Find a remote index and an owner for a row 5389 * The row could be local or remote 5390 * */ 5391 owner = 0; 5392 lidx = 0; 5393 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5394 iremote[i].index = lidx; 5395 iremote[i].rank = owner; 5396 } 5397 /* Create SF to communicate how many nonzero columns for each row */ 5398 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5399 /* SF will figure out the number of nonzero colunms for each row, and their 5400 * offsets 5401 * */ 5402 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5403 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5404 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5405 5406 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5407 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5408 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5409 roffsets[0] = 0; 5410 roffsets[1] = 0; 5411 for (i=0;i<plocalsize;i++) { 5412 /* diag */ 5413 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5414 /* off diag */ 5415 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5416 /* compute offsets so that we relative location for each row */ 5417 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5418 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5419 } 5420 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5421 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5422 /* 'r' means root, and 'l' means leaf */ 5423 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5424 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5425 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5426 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5427 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5428 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5429 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5430 dntotalcols = 0; 5431 ontotalcols = 0; 5432 ncol = 0; 5433 for (i=0;i<nrows;i++) { 5434 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5435 ncol = PetscMax(pnnz[i],ncol); 5436 /* diag */ 5437 dntotalcols += nlcols[i*2+0]; 5438 /* off diag */ 5439 ontotalcols += nlcols[i*2+1]; 5440 } 5441 /* We do not need to figure the right number of columns 5442 * since all the calculations will be done by going through the raw data 5443 * */ 5444 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5445 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5446 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5447 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5448 /* diag */ 5449 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5450 /* off diag */ 5451 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5452 /* diag */ 5453 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5454 /* off diag */ 5455 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5456 dntotalcols = 0; 5457 ontotalcols = 0; 5458 ntotalcols = 0; 5459 for (i=0;i<nrows;i++) { 5460 owner = 0; 5461 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5462 /* Set iremote for diag matrix */ 5463 for (j=0;j<nlcols[i*2+0];j++) { 5464 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5465 iremote[dntotalcols].rank = owner; 5466 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5467 ilocal[dntotalcols++] = ntotalcols++; 5468 } 5469 /* off diag */ 5470 for (j=0;j<nlcols[i*2+1];j++) { 5471 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5472 oiremote[ontotalcols].rank = owner; 5473 oilocal[ontotalcols++] = ntotalcols++; 5474 } 5475 } 5476 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5477 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5478 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5479 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5480 /* P serves as roots and P_oth is leaves 5481 * Diag matrix 5482 * */ 5483 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5484 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5485 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5486 5487 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5488 /* Off diag */ 5489 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5490 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5491 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5492 ierr = MatSeqAIJGetArrayRead(p->A,&pd_a);CHKERRQ(ierr); 5493 ierr = MatSeqAIJGetArrayRead(p->B,&po_a);CHKERRQ(ierr); 5494 /* We operate on the matrix internal data for saving memory */ 5495 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5496 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5497 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5498 /* Convert to global indices for diag matrix */ 5499 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5500 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5501 /* We want P_oth store global indices */ 5502 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5503 /* Use memory scalable approach */ 5504 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5505 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5506 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5507 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5508 /* Convert back to local indices */ 5509 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5510 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5511 nout = 0; 5512 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5513 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5514 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5515 /* Exchange values */ 5516 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5517 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5518 ierr = MatSeqAIJRestoreArrayRead(p->A,&pd_a);CHKERRQ(ierr); 5519 ierr = MatSeqAIJRestoreArrayRead(p->B,&po_a);CHKERRQ(ierr); 5520 /* Stop PETSc from shrinking memory */ 5521 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5522 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5523 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5524 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5525 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5526 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5527 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5528 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5529 PetscFunctionReturn(0); 5530 } 5531 5532 /* 5533 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5534 * This supports MPIAIJ and MAIJ 5535 * */ 5536 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5537 { 5538 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5539 Mat_SeqAIJ *p_oth; 5540 IS rows,map; 5541 PetscHMapI hamp; 5542 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5543 MPI_Comm comm; 5544 PetscSF sf,osf; 5545 PetscBool has; 5546 PetscErrorCode ierr; 5547 5548 PetscFunctionBegin; 5549 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5550 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5551 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5552 * and then create a submatrix (that often is an overlapping matrix) 5553 * */ 5554 if (reuse == MAT_INITIAL_MATRIX) { 5555 /* Use a hash table to figure out unique keys */ 5556 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5557 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5558 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5559 count = 0; 5560 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5561 for (i=0;i<a->B->cmap->n;i++) { 5562 key = a->garray[i]/dof; 5563 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5564 if (!has) { 5565 mapping[i] = count; 5566 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5567 } else { 5568 /* Current 'i' has the same value the previous step */ 5569 mapping[i] = count-1; 5570 } 5571 } 5572 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5573 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5574 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count); 5575 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5576 off = 0; 5577 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5578 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5579 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5580 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5581 /* In case, the matrix was already created but users want to recreate the matrix */ 5582 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5583 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5584 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5585 ierr = ISDestroy(&map);CHKERRQ(ierr); 5586 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5587 } else if (reuse == MAT_REUSE_MATRIX) { 5588 /* If matrix was already created, we simply update values using SF objects 5589 * that as attached to the matrix ealier. 5590 */ 5591 const PetscScalar *pd_a,*po_a; 5592 5593 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5594 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5595 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5596 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5597 /* Update values in place */ 5598 ierr = MatSeqAIJGetArrayRead(p->A,&pd_a);CHKERRQ(ierr); 5599 ierr = MatSeqAIJGetArrayRead(p->B,&po_a);CHKERRQ(ierr); 5600 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5601 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5602 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5603 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5604 ierr = MatSeqAIJRestoreArrayRead(p->A,&pd_a);CHKERRQ(ierr); 5605 ierr = MatSeqAIJRestoreArrayRead(p->B,&po_a);CHKERRQ(ierr); 5606 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5607 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5608 PetscFunctionReturn(0); 5609 } 5610 5611 /*@C 5612 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5613 5614 Collective on Mat 5615 5616 Input Parameters: 5617 + A - the first matrix in mpiaij format 5618 . B - the second matrix in mpiaij format 5619 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5620 5621 Input/Output Parameters: 5622 + rowb - index sets of rows of B to extract (or NULL), modified on output 5623 - colb - index sets of columns of B to extract (or NULL), modified on output 5624 5625 Output Parameter: 5626 . B_seq - the sequential matrix generated 5627 5628 Level: developer 5629 5630 @*/ 5631 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5632 { 5633 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5634 PetscErrorCode ierr; 5635 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5636 IS isrowb,iscolb; 5637 Mat *bseq=NULL; 5638 5639 PetscFunctionBegin; 5640 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5641 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5642 } 5643 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5644 5645 if (scall == MAT_INITIAL_MATRIX) { 5646 start = A->cmap->rstart; 5647 cmap = a->garray; 5648 nzA = a->A->cmap->n; 5649 nzB = a->B->cmap->n; 5650 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5651 ncols = 0; 5652 for (i=0; i<nzB; i++) { /* row < local row index */ 5653 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5654 else break; 5655 } 5656 imark = i; 5657 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5658 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5659 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5660 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5661 } else { 5662 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5663 isrowb = *rowb; iscolb = *colb; 5664 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5665 bseq[0] = *B_seq; 5666 } 5667 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5668 *B_seq = bseq[0]; 5669 ierr = PetscFree(bseq);CHKERRQ(ierr); 5670 if (!rowb) { 5671 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5672 } else { 5673 *rowb = isrowb; 5674 } 5675 if (!colb) { 5676 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5677 } else { 5678 *colb = iscolb; 5679 } 5680 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5681 PetscFunctionReturn(0); 5682 } 5683 5684 /* 5685 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5686 of the OFF-DIAGONAL portion of local A 5687 5688 Collective on Mat 5689 5690 Input Parameters: 5691 + A,B - the matrices in mpiaij format 5692 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5693 5694 Output Parameter: 5695 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5696 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5697 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5698 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5699 5700 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5701 for this matrix. This is not desirable.. 5702 5703 Level: developer 5704 5705 */ 5706 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5707 { 5708 PetscErrorCode ierr; 5709 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5710 Mat_SeqAIJ *b_oth; 5711 VecScatter ctx; 5712 MPI_Comm comm; 5713 const PetscMPIInt *rprocs,*sprocs; 5714 const PetscInt *srow,*rstarts,*sstarts; 5715 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5716 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5717 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5718 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5719 PetscMPIInt size,tag,rank,nreqs; 5720 5721 PetscFunctionBegin; 5722 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5723 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5724 5725 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5726 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5727 } 5728 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5729 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 5730 5731 if (size == 1) { 5732 startsj_s = NULL; 5733 bufa_ptr = NULL; 5734 *B_oth = NULL; 5735 PetscFunctionReturn(0); 5736 } 5737 5738 ctx = a->Mvctx; 5739 tag = ((PetscObject)ctx)->tag; 5740 5741 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5742 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5743 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5744 ierr = PetscMPIIntCast(nsends+nrecvs,&nreqs);CHKERRQ(ierr); 5745 ierr = PetscMalloc1(nreqs,&reqs);CHKERRQ(ierr); 5746 rwaits = reqs; 5747 swaits = reqs + nrecvs; 5748 5749 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5750 if (scall == MAT_INITIAL_MATRIX) { 5751 /* i-array */ 5752 /*---------*/ 5753 /* post receives */ 5754 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5755 for (i=0; i<nrecvs; i++) { 5756 rowlen = rvalues + rstarts[i]*rbs; 5757 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5758 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5759 } 5760 5761 /* pack the outgoing message */ 5762 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5763 5764 sstartsj[0] = 0; 5765 rstartsj[0] = 0; 5766 len = 0; /* total length of j or a array to be sent */ 5767 if (nsends) { 5768 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5769 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5770 } 5771 for (i=0; i<nsends; i++) { 5772 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5773 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5774 for (j=0; j<nrows; j++) { 5775 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5776 for (l=0; l<sbs; l++) { 5777 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5778 5779 rowlen[j*sbs+l] = ncols; 5780 5781 len += ncols; 5782 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5783 } 5784 k++; 5785 } 5786 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5787 5788 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5789 } 5790 /* recvs and sends of i-array are completed */ 5791 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5792 ierr = PetscFree(svalues);CHKERRQ(ierr); 5793 5794 /* allocate buffers for sending j and a arrays */ 5795 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5796 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5797 5798 /* create i-array of B_oth */ 5799 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5800 5801 b_othi[0] = 0; 5802 len = 0; /* total length of j or a array to be received */ 5803 k = 0; 5804 for (i=0; i<nrecvs; i++) { 5805 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5806 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5807 for (j=0; j<nrows; j++) { 5808 b_othi[k+1] = b_othi[k] + rowlen[j]; 5809 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5810 k++; 5811 } 5812 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5813 } 5814 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5815 5816 /* allocate space for j and a arrrays of B_oth */ 5817 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5818 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5819 5820 /* j-array */ 5821 /*---------*/ 5822 /* post receives of j-array */ 5823 for (i=0; i<nrecvs; i++) { 5824 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5825 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5826 } 5827 5828 /* pack the outgoing message j-array */ 5829 if (nsends) k = sstarts[0]; 5830 for (i=0; i<nsends; i++) { 5831 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5832 bufJ = bufj+sstartsj[i]; 5833 for (j=0; j<nrows; j++) { 5834 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5835 for (ll=0; ll<sbs; ll++) { 5836 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5837 for (l=0; l<ncols; l++) { 5838 *bufJ++ = cols[l]; 5839 } 5840 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5841 } 5842 } 5843 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5844 } 5845 5846 /* recvs and sends of j-array are completed */ 5847 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5848 } else if (scall == MAT_REUSE_MATRIX) { 5849 sstartsj = *startsj_s; 5850 rstartsj = *startsj_r; 5851 bufa = *bufa_ptr; 5852 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5853 ierr = MatSeqAIJGetArrayWrite(*B_oth,&b_otha);CHKERRQ(ierr); 5854 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5855 5856 /* a-array */ 5857 /*---------*/ 5858 /* post receives of a-array */ 5859 for (i=0; i<nrecvs; i++) { 5860 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5861 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5862 } 5863 5864 /* pack the outgoing message a-array */ 5865 if (nsends) k = sstarts[0]; 5866 for (i=0; i<nsends; i++) { 5867 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5868 bufA = bufa+sstartsj[i]; 5869 for (j=0; j<nrows; j++) { 5870 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5871 for (ll=0; ll<sbs; ll++) { 5872 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5873 for (l=0; l<ncols; l++) { 5874 *bufA++ = vals[l]; 5875 } 5876 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5877 } 5878 } 5879 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5880 } 5881 /* recvs and sends of a-array are completed */ 5882 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5883 ierr = PetscFree(reqs);CHKERRQ(ierr); 5884 5885 if (scall == MAT_INITIAL_MATRIX) { 5886 /* put together the new matrix */ 5887 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5888 5889 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5890 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5891 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5892 b_oth->free_a = PETSC_TRUE; 5893 b_oth->free_ij = PETSC_TRUE; 5894 b_oth->nonew = 0; 5895 5896 ierr = PetscFree(bufj);CHKERRQ(ierr); 5897 if (!startsj_s || !bufa_ptr) { 5898 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5899 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5900 } else { 5901 *startsj_s = sstartsj; 5902 *startsj_r = rstartsj; 5903 *bufa_ptr = bufa; 5904 } 5905 } else if (scall == MAT_REUSE_MATRIX) { 5906 ierr = MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha);CHKERRQ(ierr); 5907 } 5908 5909 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5910 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5911 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5912 PetscFunctionReturn(0); 5913 } 5914 5915 /*@C 5916 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5917 5918 Not Collective 5919 5920 Input Parameter: 5921 . A - The matrix in mpiaij format 5922 5923 Output Parameters: 5924 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5925 . colmap - A map from global column index to local index into lvec 5926 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5927 5928 Level: developer 5929 5930 @*/ 5931 #if defined(PETSC_USE_CTABLE) 5932 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5933 #else 5934 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5935 #endif 5936 { 5937 Mat_MPIAIJ *a; 5938 5939 PetscFunctionBegin; 5940 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5941 PetscValidPointer(lvec, 2); 5942 PetscValidPointer(colmap, 3); 5943 PetscValidPointer(multScatter, 4); 5944 a = (Mat_MPIAIJ*) A->data; 5945 if (lvec) *lvec = a->lvec; 5946 if (colmap) *colmap = a->colmap; 5947 if (multScatter) *multScatter = a->Mvctx; 5948 PetscFunctionReturn(0); 5949 } 5950 5951 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5952 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5953 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5954 #if defined(PETSC_HAVE_MKL_SPARSE) 5955 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5956 #endif 5957 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5958 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5959 #if defined(PETSC_HAVE_ELEMENTAL) 5960 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5961 #endif 5962 #if defined(PETSC_HAVE_SCALAPACK) 5963 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5964 #endif 5965 #if defined(PETSC_HAVE_HYPRE) 5966 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5967 #endif 5968 #if defined(PETSC_HAVE_CUDA) 5969 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5970 #endif 5971 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5972 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5973 #endif 5974 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5975 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5976 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5977 5978 /* 5979 Computes (B'*A')' since computing B*A directly is untenable 5980 5981 n p p 5982 [ ] [ ] [ ] 5983 m [ A ] * n [ B ] = m [ C ] 5984 [ ] [ ] [ ] 5985 5986 */ 5987 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5988 { 5989 PetscErrorCode ierr; 5990 Mat At,Bt,Ct; 5991 5992 PetscFunctionBegin; 5993 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5994 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5995 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5996 ierr = MatDestroy(&At);CHKERRQ(ierr); 5997 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5998 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5999 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 6000 PetscFunctionReturn(0); 6001 } 6002 6003 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 6004 { 6005 PetscErrorCode ierr; 6006 PetscBool cisdense; 6007 6008 PetscFunctionBegin; 6009 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 6010 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 6011 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 6012 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 6013 if (!cisdense) { 6014 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6015 } 6016 ierr = MatSetUp(C);CHKERRQ(ierr); 6017 6018 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6019 PetscFunctionReturn(0); 6020 } 6021 6022 /* ----------------------------------------------------------------*/ 6023 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6024 { 6025 Mat_Product *product = C->product; 6026 Mat A = product->A,B=product->B; 6027 6028 PetscFunctionBegin; 6029 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6030 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6031 6032 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6033 C->ops->productsymbolic = MatProductSymbolic_AB; 6034 PetscFunctionReturn(0); 6035 } 6036 6037 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6038 { 6039 PetscErrorCode ierr; 6040 Mat_Product *product = C->product; 6041 6042 PetscFunctionBegin; 6043 if (product->type == MATPRODUCT_AB) { 6044 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 6045 } 6046 PetscFunctionReturn(0); 6047 } 6048 /* ----------------------------------------------------------------*/ 6049 6050 /*MC 6051 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6052 6053 Options Database Keys: 6054 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6055 6056 Level: beginner 6057 6058 Notes: 6059 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6060 in this case the values associated with the rows and columns one passes in are set to zero 6061 in the matrix 6062 6063 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6064 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6065 6066 .seealso: MatCreateAIJ() 6067 M*/ 6068 6069 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6070 { 6071 Mat_MPIAIJ *b; 6072 PetscErrorCode ierr; 6073 PetscMPIInt size; 6074 6075 PetscFunctionBegin; 6076 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 6077 6078 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6079 B->data = (void*)b; 6080 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6081 B->assembled = PETSC_FALSE; 6082 B->insertmode = NOT_SET_VALUES; 6083 b->size = size; 6084 6085 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr); 6086 6087 /* build cache for off array entries formed */ 6088 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6089 6090 b->donotstash = PETSC_FALSE; 6091 b->colmap = NULL; 6092 b->garray = NULL; 6093 b->roworiented = PETSC_TRUE; 6094 6095 /* stuff used for matrix vector multiply */ 6096 b->lvec = NULL; 6097 b->Mvctx = NULL; 6098 6099 /* stuff for MatGetRow() */ 6100 b->rowindices = NULL; 6101 b->rowvalues = NULL; 6102 b->getrowactive = PETSC_FALSE; 6103 6104 /* flexible pointer used in CUSPARSE classes */ 6105 b->spptr = NULL; 6106 6107 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6108 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6109 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6110 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6111 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6112 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6113 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6114 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6115 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6116 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6117 #if defined(PETSC_HAVE_CUDA) 6118 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr); 6119 #endif 6120 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6121 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr); 6122 #endif 6123 #if defined(PETSC_HAVE_MKL_SPARSE) 6124 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6125 #endif 6126 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6127 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6128 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6129 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr); 6130 #if defined(PETSC_HAVE_ELEMENTAL) 6131 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6132 #endif 6133 #if defined(PETSC_HAVE_SCALAPACK) 6134 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6135 #endif 6136 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6137 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6138 #if defined(PETSC_HAVE_HYPRE) 6139 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6140 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6141 #endif 6142 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6143 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6144 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6145 PetscFunctionReturn(0); 6146 } 6147 6148 /*@C 6149 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6150 and "off-diagonal" part of the matrix in CSR format. 6151 6152 Collective 6153 6154 Input Parameters: 6155 + comm - MPI communicator 6156 . m - number of local rows (Cannot be PETSC_DECIDE) 6157 . n - This value should be the same as the local size used in creating the 6158 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6159 calculated if N is given) For square matrices n is almost always m. 6160 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6161 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6162 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6163 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6164 . a - matrix values 6165 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6166 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6167 - oa - matrix values 6168 6169 Output Parameter: 6170 . mat - the matrix 6171 6172 Level: advanced 6173 6174 Notes: 6175 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6176 must free the arrays once the matrix has been destroyed and not before. 6177 6178 The i and j indices are 0 based 6179 6180 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6181 6182 This sets local rows and cannot be used to set off-processor values. 6183 6184 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6185 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6186 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6187 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6188 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6189 communication if it is known that only local entries will be set. 6190 6191 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6192 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6193 @*/ 6194 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6195 { 6196 PetscErrorCode ierr; 6197 Mat_MPIAIJ *maij; 6198 6199 PetscFunctionBegin; 6200 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6201 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6202 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6203 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6204 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6205 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6206 maij = (Mat_MPIAIJ*) (*mat)->data; 6207 6208 (*mat)->preallocated = PETSC_TRUE; 6209 6210 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6211 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6212 6213 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6214 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6215 6216 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6217 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6218 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6219 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6220 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6221 PetscFunctionReturn(0); 6222 } 6223 6224 /* 6225 Special version for direct calls from Fortran 6226 */ 6227 #include <petsc/private/fortranimpl.h> 6228 6229 /* Change these macros so can be used in void function */ 6230 #undef CHKERRQ 6231 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6232 #undef SETERRQ2 6233 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6234 #undef SETERRQ3 6235 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6236 #undef SETERRQ 6237 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6238 6239 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6240 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6241 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6242 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6243 #else 6244 #endif 6245 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6246 { 6247 Mat mat = *mmat; 6248 PetscInt m = *mm, n = *mn; 6249 InsertMode addv = *maddv; 6250 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6251 PetscScalar value; 6252 PetscErrorCode ierr; 6253 6254 MatCheckPreallocated(mat,1); 6255 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6256 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6257 { 6258 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6259 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6260 PetscBool roworiented = aij->roworiented; 6261 6262 /* Some Variables required in the macro */ 6263 Mat A = aij->A; 6264 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6265 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6266 MatScalar *aa; 6267 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6268 Mat B = aij->B; 6269 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6270 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6271 MatScalar *ba; 6272 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6273 * cannot use "#if defined" inside a macro. */ 6274 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6275 6276 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6277 PetscInt nonew = a->nonew; 6278 MatScalar *ap1,*ap2; 6279 6280 PetscFunctionBegin; 6281 ierr = MatSeqAIJGetArray(A,&aa);CHKERRQ(ierr); 6282 ierr = MatSeqAIJGetArray(B,&ba);CHKERRQ(ierr); 6283 for (i=0; i<m; i++) { 6284 if (im[i] < 0) continue; 6285 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6286 if (im[i] >= rstart && im[i] < rend) { 6287 row = im[i] - rstart; 6288 lastcol1 = -1; 6289 rp1 = aj + ai[row]; 6290 ap1 = aa + ai[row]; 6291 rmax1 = aimax[row]; 6292 nrow1 = ailen[row]; 6293 low1 = 0; 6294 high1 = nrow1; 6295 lastcol2 = -1; 6296 rp2 = bj + bi[row]; 6297 ap2 = ba + bi[row]; 6298 rmax2 = bimax[row]; 6299 nrow2 = bilen[row]; 6300 low2 = 0; 6301 high2 = nrow2; 6302 6303 for (j=0; j<n; j++) { 6304 if (roworiented) value = v[i*n+j]; 6305 else value = v[i+j*m]; 6306 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6307 if (in[j] >= cstart && in[j] < cend) { 6308 col = in[j] - cstart; 6309 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6310 } else if (in[j] < 0) continue; 6311 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6312 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6313 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6314 } else { 6315 if (mat->was_assembled) { 6316 if (!aij->colmap) { 6317 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6318 } 6319 #if defined(PETSC_USE_CTABLE) 6320 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6321 col--; 6322 #else 6323 col = aij->colmap[in[j]] - 1; 6324 #endif 6325 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6326 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6327 col = in[j]; 6328 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6329 B = aij->B; 6330 b = (Mat_SeqAIJ*)B->data; 6331 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6332 rp2 = bj + bi[row]; 6333 ap2 = ba + bi[row]; 6334 rmax2 = bimax[row]; 6335 nrow2 = bilen[row]; 6336 low2 = 0; 6337 high2 = nrow2; 6338 bm = aij->B->rmap->n; 6339 ba = b->a; 6340 inserted = PETSC_FALSE; 6341 } 6342 } else col = in[j]; 6343 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6344 } 6345 } 6346 } else if (!aij->donotstash) { 6347 if (roworiented) { 6348 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6349 } else { 6350 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6351 } 6352 } 6353 } 6354 ierr = MatSeqAIJRestoreArray(A,&aa);CHKERRQ(ierr); 6355 ierr = MatSeqAIJRestoreArray(B,&ba);CHKERRQ(ierr); 6356 } 6357 PetscFunctionReturnVoid(); 6358 } 6359 6360 typedef struct { 6361 Mat *mp; /* intermediate products */ 6362 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6363 PetscInt cp; /* number of intermediate products */ 6364 6365 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6366 PetscInt *startsj_s,*startsj_r; 6367 PetscScalar *bufa; 6368 Mat P_oth; 6369 6370 /* may take advantage of merging product->B */ 6371 Mat Bloc; /* B-local by merging diag and off-diag */ 6372 6373 /* cusparse does not have support to split between symbolic and numeric phases. 6374 When api_user is true, we don't need to update the numerical values 6375 of the temporary storage */ 6376 PetscBool reusesym; 6377 6378 /* support for COO values insertion */ 6379 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6380 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6381 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6382 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6383 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6384 PetscMemType mtype; 6385 6386 /* customization */ 6387 PetscBool abmerge; 6388 PetscBool P_oth_bind; 6389 } MatMatMPIAIJBACKEND; 6390 6391 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6392 { 6393 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6394 PetscInt i; 6395 PetscErrorCode ierr; 6396 6397 PetscFunctionBegin; 6398 ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr); 6399 ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr); 6400 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr); 6401 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr); 6402 ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr); 6403 ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr); 6404 ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr); 6405 for (i = 0; i < mmdata->cp; i++) { 6406 ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr); 6407 } 6408 ierr = PetscFree2(mmdata->mp,mmdata->mptmp);CHKERRQ(ierr); 6409 ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr); 6410 ierr = PetscFree(mmdata->own);CHKERRQ(ierr); 6411 ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr); 6412 ierr = PetscFree(mmdata->off);CHKERRQ(ierr); 6413 ierr = PetscFree(mmdata);CHKERRQ(ierr); 6414 PetscFunctionReturn(0); 6415 } 6416 6417 /* Copy selected n entries with indices in idx[] of A to v[]. 6418 If idx is NULL, copy the whole data array of A to v[] 6419 */ 6420 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6421 { 6422 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6423 PetscErrorCode ierr; 6424 6425 PetscFunctionBegin; 6426 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr); 6427 if (f) { 6428 ierr = (*f)(A,n,idx,v);CHKERRQ(ierr); 6429 } else { 6430 const PetscScalar *vv; 6431 6432 ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr); 6433 if (n && idx) { 6434 PetscScalar *w = v; 6435 const PetscInt *oi = idx; 6436 PetscInt j; 6437 6438 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6439 } else { 6440 ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr); 6441 } 6442 ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr); 6443 } 6444 PetscFunctionReturn(0); 6445 } 6446 6447 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6448 { 6449 MatMatMPIAIJBACKEND *mmdata; 6450 PetscInt i,n_d,n_o; 6451 PetscErrorCode ierr; 6452 6453 PetscFunctionBegin; 6454 MatCheckProduct(C,1); 6455 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6456 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6457 if (!mmdata->reusesym) { /* update temporary matrices */ 6458 if (mmdata->P_oth) { 6459 ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6460 } 6461 if (mmdata->Bloc) { 6462 ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr); 6463 } 6464 } 6465 mmdata->reusesym = PETSC_FALSE; 6466 6467 for (i = 0; i < mmdata->cp; i++) { 6468 if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6469 ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr); 6470 } 6471 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6472 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6473 6474 if (mmdata->mptmp[i]) continue; 6475 if (noff) { 6476 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6477 6478 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr); 6479 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr); 6480 n_o += noff; 6481 n_d += nown; 6482 } else { 6483 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6484 6485 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr); 6486 n_d += mm->nz; 6487 } 6488 } 6489 if (mmdata->hasoffproc) { /* offprocess insertion */ 6490 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6491 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6492 } 6493 ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr); 6494 PetscFunctionReturn(0); 6495 } 6496 6497 /* Support for Pt * A, A * P, or Pt * A * P */ 6498 #define MAX_NUMBER_INTERMEDIATE 4 6499 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6500 { 6501 Mat_Product *product = C->product; 6502 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6503 Mat_MPIAIJ *a,*p; 6504 MatMatMPIAIJBACKEND *mmdata; 6505 ISLocalToGlobalMapping P_oth_l2g = NULL; 6506 IS glob = NULL; 6507 const char *prefix; 6508 char pprefix[256]; 6509 const PetscInt *globidx,*P_oth_idx; 6510 PetscInt i,j,cp,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j; 6511 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6512 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6513 /* a base offset; type-2: sparse with a local to global map table */ 6514 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6515 6516 MatProductType ptype; 6517 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6518 PetscMPIInt size; 6519 PetscErrorCode ierr; 6520 6521 PetscFunctionBegin; 6522 MatCheckProduct(C,1); 6523 if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6524 ptype = product->type; 6525 if (product->A->symmetric && ptype == MATPRODUCT_AtB) { 6526 ptype = MATPRODUCT_AB; 6527 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 6528 } 6529 switch (ptype) { 6530 case MATPRODUCT_AB: 6531 A = product->A; 6532 P = product->B; 6533 m = A->rmap->n; 6534 n = P->cmap->n; 6535 M = A->rmap->N; 6536 N = P->cmap->N; 6537 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6538 break; 6539 case MATPRODUCT_AtB: 6540 P = product->A; 6541 A = product->B; 6542 m = P->cmap->n; 6543 n = A->cmap->n; 6544 M = P->cmap->N; 6545 N = A->cmap->N; 6546 hasoffproc = PETSC_TRUE; 6547 break; 6548 case MATPRODUCT_PtAP: 6549 A = product->A; 6550 P = product->B; 6551 m = P->cmap->n; 6552 n = P->cmap->n; 6553 M = P->cmap->N; 6554 N = P->cmap->N; 6555 hasoffproc = PETSC_TRUE; 6556 break; 6557 default: 6558 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6559 } 6560 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr); 6561 if (size == 1) hasoffproc = PETSC_FALSE; 6562 6563 /* defaults */ 6564 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6565 mp[i] = NULL; 6566 mptmp[i] = PETSC_FALSE; 6567 rmapt[i] = -1; 6568 cmapt[i] = -1; 6569 rmapa[i] = NULL; 6570 cmapa[i] = NULL; 6571 } 6572 6573 /* customization */ 6574 ierr = PetscNew(&mmdata);CHKERRQ(ierr); 6575 mmdata->reusesym = product->api_user; 6576 if (ptype == MATPRODUCT_AB) { 6577 if (product->api_user) { 6578 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 6579 ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6580 ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6581 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6582 } else { 6583 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 6584 ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6585 ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6586 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6587 } 6588 } else if (ptype == MATPRODUCT_PtAP) { 6589 if (product->api_user) { 6590 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 6591 ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6592 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6593 } else { 6594 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 6595 ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6596 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6597 } 6598 } 6599 a = (Mat_MPIAIJ*)A->data; 6600 p = (Mat_MPIAIJ*)P->data; 6601 ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr); 6602 ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr); 6603 ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr); 6604 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6605 ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr); 6606 6607 cp = 0; 6608 switch (ptype) { 6609 case MATPRODUCT_AB: /* A * P */ 6610 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6611 6612 /* A_diag * P_local (merged or not) */ 6613 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 6614 /* P is product->B */ 6615 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6616 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6617 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6618 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6619 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6620 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6621 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6622 mp[cp]->product->api_user = product->api_user; 6623 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6624 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6625 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6626 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6627 rmapt[cp] = 1; 6628 cmapt[cp] = 2; 6629 cmapa[cp] = globidx; 6630 mptmp[cp] = PETSC_FALSE; 6631 cp++; 6632 } else { /* A_diag * P_diag and A_diag * P_off */ 6633 ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr); 6634 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6635 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6636 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6637 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6638 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6639 mp[cp]->product->api_user = product->api_user; 6640 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6641 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6642 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6643 rmapt[cp] = 1; 6644 cmapt[cp] = 1; 6645 mptmp[cp] = PETSC_FALSE; 6646 cp++; 6647 ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr); 6648 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6649 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6650 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6651 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6652 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6653 mp[cp]->product->api_user = product->api_user; 6654 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6655 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6656 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6657 rmapt[cp] = 1; 6658 cmapt[cp] = 2; 6659 cmapa[cp] = p->garray; 6660 mptmp[cp] = PETSC_FALSE; 6661 cp++; 6662 } 6663 6664 /* A_off * P_other */ 6665 if (mmdata->P_oth) { 6666 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); /* make P_oth use local col ids */ 6667 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6668 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6669 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6670 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6671 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6672 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6673 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6674 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6675 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6676 mp[cp]->product->api_user = product->api_user; 6677 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6678 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6679 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6680 rmapt[cp] = 1; 6681 cmapt[cp] = 2; 6682 cmapa[cp] = P_oth_idx; 6683 mptmp[cp] = PETSC_FALSE; 6684 cp++; 6685 } 6686 break; 6687 6688 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 6689 /* A is product->B */ 6690 ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6691 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 6692 ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6693 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6694 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6695 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6696 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6697 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6698 mp[cp]->product->api_user = product->api_user; 6699 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6700 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6701 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6702 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6703 rmapt[cp] = 2; 6704 rmapa[cp] = globidx; 6705 cmapt[cp] = 2; 6706 cmapa[cp] = globidx; 6707 mptmp[cp] = PETSC_FALSE; 6708 cp++; 6709 } else { 6710 ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6711 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6712 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6713 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6714 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6715 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6716 mp[cp]->product->api_user = product->api_user; 6717 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6718 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6719 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6720 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6721 rmapt[cp] = 1; 6722 cmapt[cp] = 2; 6723 cmapa[cp] = globidx; 6724 mptmp[cp] = PETSC_FALSE; 6725 cp++; 6726 ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6727 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6728 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6729 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6730 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6731 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6732 mp[cp]->product->api_user = product->api_user; 6733 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6734 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6735 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6736 rmapt[cp] = 2; 6737 rmapa[cp] = p->garray; 6738 cmapt[cp] = 2; 6739 cmapa[cp] = globidx; 6740 mptmp[cp] = PETSC_FALSE; 6741 cp++; 6742 } 6743 break; 6744 case MATPRODUCT_PtAP: 6745 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6746 /* P is product->B */ 6747 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6748 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6749 ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr); 6750 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6751 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6752 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6753 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6754 mp[cp]->product->api_user = product->api_user; 6755 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6756 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6757 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6758 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6759 rmapt[cp] = 2; 6760 rmapa[cp] = globidx; 6761 cmapt[cp] = 2; 6762 cmapa[cp] = globidx; 6763 mptmp[cp] = PETSC_FALSE; 6764 cp++; 6765 if (mmdata->P_oth) { 6766 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); 6767 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6768 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6769 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6770 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6771 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6772 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6773 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6774 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6775 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6776 mp[cp]->product->api_user = product->api_user; 6777 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6778 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6779 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6780 mptmp[cp] = PETSC_TRUE; 6781 cp++; 6782 ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr); 6783 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6784 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6785 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6786 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6787 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6788 mp[cp]->product->api_user = product->api_user; 6789 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6790 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6791 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6792 rmapt[cp] = 2; 6793 rmapa[cp] = globidx; 6794 cmapt[cp] = 2; 6795 cmapa[cp] = P_oth_idx; 6796 mptmp[cp] = PETSC_FALSE; 6797 cp++; 6798 } 6799 break; 6800 default: 6801 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6802 } 6803 /* sanity check */ 6804 if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i); 6805 6806 ierr = PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);CHKERRQ(ierr); 6807 for (i = 0; i < cp; i++) { 6808 mmdata->mp[i] = mp[i]; 6809 mmdata->mptmp[i] = mptmp[i]; 6810 } 6811 mmdata->cp = cp; 6812 C->product->data = mmdata; 6813 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 6814 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 6815 6816 /* memory type */ 6817 mmdata->mtype = PETSC_MEMTYPE_HOST; 6818 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr); 6819 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr); 6820 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 6821 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 6822 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE; 6823 #endif 6824 6825 /* prepare coo coordinates for values insertion */ 6826 6827 /* count total nonzeros of those intermediate seqaij Mats 6828 ncoo_d: # of nonzeros of matrices that do not have offproc entries 6829 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 6830 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 6831 */ 6832 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 6833 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6834 if (mptmp[cp]) continue; 6835 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 6836 const PetscInt *rmap = rmapa[cp]; 6837 const PetscInt mr = mp[cp]->rmap->n; 6838 const PetscInt rs = C->rmap->rstart; 6839 const PetscInt re = C->rmap->rend; 6840 const PetscInt *ii = mm->i; 6841 for (i = 0; i < mr; i++) { 6842 const PetscInt gr = rmap[i]; 6843 const PetscInt nz = ii[i+1] - ii[i]; 6844 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 6845 else ncoo_oown += nz; /* this row is local */ 6846 } 6847 } else ncoo_d += mm->nz; 6848 } 6849 6850 /* 6851 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 6852 6853 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 6854 6855 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 6856 6857 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 6858 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 6859 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 6860 6861 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 6862 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 6863 */ 6864 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); /* +1 to make a csr-like data structure */ 6865 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr); 6866 6867 /* gather (i,j) of nonzeros inserted by remote procs */ 6868 if (hasoffproc) { 6869 PetscSF msf; 6870 PetscInt ncoo2,*coo_i2,*coo_j2; 6871 6872 ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr); 6873 ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr); 6874 ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); /* to collect (i,j) of entries to be sent to others */ 6875 6876 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 6877 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6878 PetscInt *idxoff = mmdata->off[cp]; 6879 PetscInt *idxown = mmdata->own[cp]; 6880 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 6881 const PetscInt *rmap = rmapa[cp]; 6882 const PetscInt *cmap = cmapa[cp]; 6883 const PetscInt *ii = mm->i; 6884 PetscInt *coi = coo_i + ncoo_o; 6885 PetscInt *coj = coo_j + ncoo_o; 6886 const PetscInt mr = mp[cp]->rmap->n; 6887 const PetscInt rs = C->rmap->rstart; 6888 const PetscInt re = C->rmap->rend; 6889 const PetscInt cs = C->cmap->rstart; 6890 for (i = 0; i < mr; i++) { 6891 const PetscInt *jj = mm->j + ii[i]; 6892 const PetscInt gr = rmap[i]; 6893 const PetscInt nz = ii[i+1] - ii[i]; 6894 if (gr < rs || gr >= re) { /* this is an offproc row */ 6895 for (j = ii[i]; j < ii[i+1]; j++) { 6896 *coi++ = gr; 6897 *idxoff++ = j; 6898 } 6899 if (!cmapt[cp]) { /* already global */ 6900 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6901 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6902 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6903 } else { /* offdiag */ 6904 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6905 } 6906 ncoo_o += nz; 6907 } else { /* this is a local row */ 6908 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 6909 } 6910 } 6911 } 6912 mmdata->off[cp + 1] = idxoff; 6913 mmdata->own[cp + 1] = idxown; 6914 } 6915 6916 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6917 ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr); 6918 ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr); 6919 ierr = PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);CHKERRQ(ierr); 6920 ncoo = ncoo_d + ncoo_oown + ncoo2; 6921 ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr); 6922 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); /* put (i,j) of remote nonzeros at back */ 6923 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6924 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6925 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6926 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6927 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 6928 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr); 6929 coo_i = coo_i2; 6930 coo_j = coo_j2; 6931 } else { /* no offproc values insertion */ 6932 ncoo = ncoo_d; 6933 ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr); 6934 6935 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6936 ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr); 6937 ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr); 6938 } 6939 mmdata->hasoffproc = hasoffproc; 6940 6941 /* gather (i,j) of nonzeros inserted locally */ 6942 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 6943 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6944 PetscInt *coi = coo_i + ncoo_d; 6945 PetscInt *coj = coo_j + ncoo_d; 6946 const PetscInt *jj = mm->j; 6947 const PetscInt *ii = mm->i; 6948 const PetscInt *cmap = cmapa[cp]; 6949 const PetscInt *rmap = rmapa[cp]; 6950 const PetscInt mr = mp[cp]->rmap->n; 6951 const PetscInt rs = C->rmap->rstart; 6952 const PetscInt re = C->rmap->rend; 6953 const PetscInt cs = C->cmap->rstart; 6954 6955 if (mptmp[cp]) continue; 6956 if (rmapt[cp] == 1) { /* consecutive rows */ 6957 /* fill coo_i */ 6958 for (i = 0; i < mr; i++) { 6959 const PetscInt gr = i + rs; 6960 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 6961 } 6962 /* fill coo_j */ 6963 if (!cmapt[cp]) { /* type-0, already global */ 6964 ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr); 6965 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 6966 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 6967 } else { /* type-2, local to global for sparse columns */ 6968 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 6969 } 6970 ncoo_d += mm->nz; 6971 } else if (rmapt[cp] == 2) { /* sparse rows */ 6972 for (i = 0; i < mr; i++) { 6973 const PetscInt *jj = mm->j + ii[i]; 6974 const PetscInt gr = rmap[i]; 6975 const PetscInt nz = ii[i+1] - ii[i]; 6976 if (gr >= rs && gr < re) { /* local rows */ 6977 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 6978 if (!cmapt[cp]) { /* type-0, already global */ 6979 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6980 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6981 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6982 } else { /* type-2, local to global for sparse columns */ 6983 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6984 } 6985 ncoo_d += nz; 6986 } 6987 } 6988 } 6989 } 6990 if (glob) { 6991 ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr); 6992 } 6993 ierr = ISDestroy(&glob);CHKERRQ(ierr); 6994 if (P_oth_l2g) { 6995 ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6996 } 6997 ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr); 6998 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 6999 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr); 7000 7001 /* preallocate with COO data */ 7002 ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr); 7003 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 7004 PetscFunctionReturn(0); 7005 } 7006 7007 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7008 { 7009 Mat_Product *product = mat->product; 7010 PetscErrorCode ierr; 7011 #if defined(PETSC_HAVE_DEVICE) 7012 PetscBool match = PETSC_FALSE; 7013 PetscBool usecpu = PETSC_FALSE; 7014 #else 7015 PetscBool match = PETSC_TRUE; 7016 #endif 7017 7018 PetscFunctionBegin; 7019 MatCheckProduct(mat,1); 7020 #if defined(PETSC_HAVE_DEVICE) 7021 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7022 ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr); 7023 } 7024 if (match) { /* we can always fallback to the CPU if requested */ 7025 switch (product->type) { 7026 case MATPRODUCT_AB: 7027 if (product->api_user) { 7028 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 7029 ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7030 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7031 } else { 7032 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 7033 ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7034 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7035 } 7036 break; 7037 case MATPRODUCT_AtB: 7038 if (product->api_user) { 7039 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr); 7040 ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7041 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7042 } else { 7043 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr); 7044 ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7045 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7046 } 7047 break; 7048 case MATPRODUCT_PtAP: 7049 if (product->api_user) { 7050 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 7051 ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7052 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7053 } else { 7054 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 7055 ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7056 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7057 } 7058 break; 7059 default: 7060 break; 7061 } 7062 match = (PetscBool)!usecpu; 7063 } 7064 #endif 7065 if (match) { 7066 switch (product->type) { 7067 case MATPRODUCT_AB: 7068 case MATPRODUCT_AtB: 7069 case MATPRODUCT_PtAP: 7070 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7071 break; 7072 default: 7073 break; 7074 } 7075 } 7076 /* fallback to MPIAIJ ops */ 7077 if (!mat->ops->productsymbolic) { 7078 ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr); 7079 } 7080 PetscFunctionReturn(0); 7081 } 7082