1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb,*aav,*bav; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = NULL; 92 93 ia = a->i; 94 ib = b->i; 95 ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr); 96 ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr); 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) { 101 cnt++; 102 goto ok1; 103 } 104 aa = aav + ia[i]; 105 for (j=0; j<na; j++) { 106 if (aa[j] != 0.0) goto ok1; 107 } 108 bb = bav + ib[i]; 109 for (j=0; j <nb; j++) { 110 if (bb[j] != 0.0) goto ok1; 111 } 112 cnt++; 113 ok1:; 114 } 115 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 116 if (!n0rows) { 117 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 118 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 122 cnt = 0; 123 for (i=0; i<m; i++) { 124 na = ia[i+1] - ia[i]; 125 nb = ib[i+1] - ib[i]; 126 if (!na && !nb) continue; 127 aa = aav + ia[i]; 128 for (j=0; j<na;j++) { 129 if (aa[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 bb = bav + ib[i]; 135 for (j=0; j<nb; j++) { 136 if (bb[j] != 0.0) { 137 rows[cnt++] = rstart + i; 138 goto ok2; 139 } 140 } 141 ok2:; 142 } 143 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 144 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 145 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 150 { 151 PetscErrorCode ierr; 152 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 153 PetscBool cong; 154 155 PetscFunctionBegin; 156 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 157 if (Y->assembled && cong) { 158 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 159 } else { 160 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 161 } 162 PetscFunctionReturn(0); 163 } 164 165 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 166 { 167 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 168 PetscErrorCode ierr; 169 PetscInt i,rstart,nrows,*rows; 170 171 PetscFunctionBegin; 172 *zrows = NULL; 173 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 174 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 175 for (i=0; i<nrows; i++) rows[i] += rstart; 176 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 181 { 182 PetscErrorCode ierr; 183 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 184 PetscInt i,n,*garray = aij->garray; 185 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 186 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 187 PetscReal *work; 188 const PetscScalar *dummy; 189 190 PetscFunctionBegin; 191 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 192 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 193 ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr); 194 ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr); 195 ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr); 196 ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr); 197 if (type == NORM_2) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 203 } 204 } else if (type == NORM_1) { 205 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 206 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 207 } 208 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 209 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 210 } 211 } else if (type == NORM_INFINITY) { 212 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 213 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 214 } 215 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 216 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 217 } 218 219 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 220 if (type == NORM_INFINITY) { 221 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 222 } else { 223 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 224 } 225 ierr = PetscFree(work);CHKERRQ(ierr); 226 if (type == NORM_2) { 227 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 228 } 229 PetscFunctionReturn(0); 230 } 231 232 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 233 { 234 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 235 IS sis,gis; 236 PetscErrorCode ierr; 237 const PetscInt *isis,*igis; 238 PetscInt n,*iis,nsis,ngis,rstart,i; 239 240 PetscFunctionBegin; 241 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 242 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 243 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 244 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 245 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 246 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 247 248 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 249 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 250 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 251 n = ngis + nsis; 252 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 253 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 254 for (i=0; i<n; i++) iis[i] += rstart; 255 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 256 257 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 258 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 259 ierr = ISDestroy(&sis);CHKERRQ(ierr); 260 ierr = ISDestroy(&gis);CHKERRQ(ierr); 261 PetscFunctionReturn(0); 262 } 263 264 /* 265 Local utility routine that creates a mapping from the global column 266 number to the local number in the off-diagonal part of the local 267 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 268 a slightly higher hash table cost; without it it is not scalable (each processor 269 has an order N integer array but is fast to access. 270 */ 271 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 272 { 273 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 274 PetscErrorCode ierr; 275 PetscInt n = aij->B->cmap->n,i; 276 277 PetscFunctionBegin; 278 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 279 #if defined(PETSC_USE_CTABLE) 280 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 281 for (i=0; i<n; i++) { 282 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 283 } 284 #else 285 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 286 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 287 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 288 #endif 289 PetscFunctionReturn(0); 290 } 291 292 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 293 { \ 294 if (col <= lastcol1) low1 = 0; \ 295 else high1 = nrow1; \ 296 lastcol1 = col;\ 297 while (high1-low1 > 5) { \ 298 t = (low1+high1)/2; \ 299 if (rp1[t] > col) high1 = t; \ 300 else low1 = t; \ 301 } \ 302 for (_i=low1; _i<high1; _i++) { \ 303 if (rp1[_i] > col) break; \ 304 if (rp1[_i] == col) { \ 305 if (addv == ADD_VALUES) { \ 306 ap1[_i] += value; \ 307 /* Not sure LogFlops will slow dow the code or not */ \ 308 (void)PetscLogFlops(1.0); \ 309 } \ 310 else ap1[_i] = value; \ 311 inserted = PETSC_TRUE; \ 312 goto a_noinsert; \ 313 } \ 314 } \ 315 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 316 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 317 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 318 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 319 N = nrow1++ - 1; a->nz++; high1++; \ 320 /* shift up all the later entries in this row */ \ 321 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 322 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 323 rp1[_i] = col; \ 324 ap1[_i] = value; \ 325 A->nonzerostate++;\ 326 a_noinsert: ; \ 327 ailen[row] = nrow1; \ 328 } 329 330 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 331 { \ 332 if (col <= lastcol2) low2 = 0; \ 333 else high2 = nrow2; \ 334 lastcol2 = col; \ 335 while (high2-low2 > 5) { \ 336 t = (low2+high2)/2; \ 337 if (rp2[t] > col) high2 = t; \ 338 else low2 = t; \ 339 } \ 340 for (_i=low2; _i<high2; _i++) { \ 341 if (rp2[_i] > col) break; \ 342 if (rp2[_i] == col) { \ 343 if (addv == ADD_VALUES) { \ 344 ap2[_i] += value; \ 345 (void)PetscLogFlops(1.0); \ 346 } \ 347 else ap2[_i] = value; \ 348 inserted = PETSC_TRUE; \ 349 goto b_noinsert; \ 350 } \ 351 } \ 352 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 353 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 354 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 355 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 356 N = nrow2++ - 1; b->nz++; high2++; \ 357 /* shift up all the later entries in this row */ \ 358 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 359 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 360 rp2[_i] = col; \ 361 ap2[_i] = value; \ 362 B->nonzerostate++; \ 363 b_noinsert: ; \ 364 bilen[row] = nrow2; \ 365 } 366 367 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 368 { 369 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 370 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 371 PetscErrorCode ierr; 372 PetscInt l,*garray = mat->garray,diag; 373 374 PetscFunctionBegin; 375 /* code only works for square matrices A */ 376 377 /* find size of row to the left of the diagonal part */ 378 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 379 row = row - diag; 380 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 381 if (garray[b->j[b->i[row]+l]] > diag) break; 382 } 383 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 384 385 /* diagonal part */ 386 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 387 388 /* right of diagonal part */ 389 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 390 #if defined(PETSC_HAVE_DEVICE) 391 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 392 #endif 393 PetscFunctionReturn(0); 394 } 395 396 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 397 { 398 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 399 PetscScalar value = 0.0; 400 PetscErrorCode ierr; 401 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 402 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 403 PetscBool roworiented = aij->roworiented; 404 405 /* Some Variables required in the macro */ 406 Mat A = aij->A; 407 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 408 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 409 PetscBool ignorezeroentries = a->ignorezeroentries; 410 Mat B = aij->B; 411 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 412 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 413 MatScalar *aa,*ba; 414 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 415 * cannot use "#if defined" inside a macro. */ 416 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 417 418 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 419 PetscInt nonew; 420 MatScalar *ap1,*ap2; 421 422 PetscFunctionBegin; 423 #if defined(PETSC_HAVE_DEVICE) 424 if (A->offloadmask == PETSC_OFFLOAD_GPU) { 425 const PetscScalar *dummy; 426 ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr); 427 ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr); 428 } 429 if (B->offloadmask == PETSC_OFFLOAD_GPU) { 430 const PetscScalar *dummy; 431 ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr); 432 ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr); 433 } 434 #endif 435 aa = a->a; 436 ba = b->a; 437 for (i=0; i<m; i++) { 438 if (im[i] < 0) continue; 439 if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 440 if (im[i] >= rstart && im[i] < rend) { 441 row = im[i] - rstart; 442 lastcol1 = -1; 443 rp1 = aj + ai[row]; 444 ap1 = aa + ai[row]; 445 rmax1 = aimax[row]; 446 nrow1 = ailen[row]; 447 low1 = 0; 448 high1 = nrow1; 449 lastcol2 = -1; 450 rp2 = bj + bi[row]; 451 ap2 = ba + bi[row]; 452 rmax2 = bimax[row]; 453 nrow2 = bilen[row]; 454 low2 = 0; 455 high2 = nrow2; 456 457 for (j=0; j<n; j++) { 458 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 459 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 460 if (in[j] >= cstart && in[j] < cend) { 461 col = in[j] - cstart; 462 nonew = a->nonew; 463 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 464 #if defined(PETSC_HAVE_DEVICE) 465 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 466 #endif 467 } else if (in[j] < 0) continue; 468 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 469 else { 470 if (mat->was_assembled) { 471 if (!aij->colmap) { 472 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 473 } 474 #if defined(PETSC_USE_CTABLE) 475 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 476 col--; 477 #else 478 col = aij->colmap[in[j]] - 1; 479 #endif 480 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 481 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 482 col = in[j]; 483 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 484 B = aij->B; 485 b = (Mat_SeqAIJ*)B->data; 486 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 487 rp2 = bj + bi[row]; 488 ap2 = ba + bi[row]; 489 rmax2 = bimax[row]; 490 nrow2 = bilen[row]; 491 low2 = 0; 492 high2 = nrow2; 493 bm = aij->B->rmap->n; 494 ba = b->a; 495 inserted = PETSC_FALSE; 496 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 497 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 498 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 499 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 500 } 501 } else col = in[j]; 502 nonew = b->nonew; 503 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 504 #if defined(PETSC_HAVE_DEVICE) 505 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 506 #endif 507 } 508 } 509 } else { 510 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 511 if (!aij->donotstash) { 512 mat->assembled = PETSC_FALSE; 513 if (roworiented) { 514 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 515 } else { 516 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 517 } 518 } 519 } 520 } 521 PetscFunctionReturn(0); 522 } 523 524 /* 525 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 526 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 527 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 528 */ 529 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 530 { 531 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 532 Mat A = aij->A; /* diagonal part of the matrix */ 533 Mat B = aij->B; /* offdiagonal part of the matrix */ 534 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 535 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 536 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 537 PetscInt *ailen = a->ilen,*aj = a->j; 538 PetscInt *bilen = b->ilen,*bj = b->j; 539 PetscInt am = aij->A->rmap->n,j; 540 PetscInt diag_so_far = 0,dnz; 541 PetscInt offd_so_far = 0,onz; 542 543 PetscFunctionBegin; 544 /* Iterate over all rows of the matrix */ 545 for (j=0; j<am; j++) { 546 dnz = onz = 0; 547 /* Iterate over all non-zero columns of the current row */ 548 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 549 /* If column is in the diagonal */ 550 if (mat_j[col] >= cstart && mat_j[col] < cend) { 551 aj[diag_so_far++] = mat_j[col] - cstart; 552 dnz++; 553 } else { /* off-diagonal entries */ 554 bj[offd_so_far++] = mat_j[col]; 555 onz++; 556 } 557 } 558 ailen[j] = dnz; 559 bilen[j] = onz; 560 } 561 PetscFunctionReturn(0); 562 } 563 564 /* 565 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 566 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 567 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 568 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 569 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 570 */ 571 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 572 { 573 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 574 Mat A = aij->A; /* diagonal part of the matrix */ 575 Mat B = aij->B; /* offdiagonal part of the matrix */ 576 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 577 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 578 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 579 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 580 PetscInt *ailen = a->ilen,*aj = a->j; 581 PetscInt *bilen = b->ilen,*bj = b->j; 582 PetscInt am = aij->A->rmap->n,j; 583 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 584 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 585 PetscScalar *aa = a->a,*ba = b->a; 586 587 PetscFunctionBegin; 588 /* Iterate over all rows of the matrix */ 589 for (j=0; j<am; j++) { 590 dnz_row = onz_row = 0; 591 rowstart_offd = full_offd_i[j]; 592 rowstart_diag = full_diag_i[j]; 593 /* Iterate over all non-zero columns of the current row */ 594 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 595 /* If column is in the diagonal */ 596 if (mat_j[col] >= cstart && mat_j[col] < cend) { 597 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 598 aa[rowstart_diag+dnz_row] = mat_a[col]; 599 dnz_row++; 600 } else { /* off-diagonal entries */ 601 bj[rowstart_offd+onz_row] = mat_j[col]; 602 ba[rowstart_offd+onz_row] = mat_a[col]; 603 onz_row++; 604 } 605 } 606 ailen[j] = dnz_row; 607 bilen[j] = onz_row; 608 } 609 PetscFunctionReturn(0); 610 } 611 612 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 613 { 614 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 615 PetscErrorCode ierr; 616 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 617 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 618 619 PetscFunctionBegin; 620 for (i=0; i<m; i++) { 621 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 622 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 623 if (idxm[i] >= rstart && idxm[i] < rend) { 624 row = idxm[i] - rstart; 625 for (j=0; j<n; j++) { 626 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 627 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 628 if (idxn[j] >= cstart && idxn[j] < cend) { 629 col = idxn[j] - cstart; 630 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 631 } else { 632 if (!aij->colmap) { 633 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 634 } 635 #if defined(PETSC_USE_CTABLE) 636 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 637 col--; 638 #else 639 col = aij->colmap[idxn[j]] - 1; 640 #endif 641 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 642 else { 643 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 644 } 645 } 646 } 647 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 648 } 649 PetscFunctionReturn(0); 650 } 651 652 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 653 { 654 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 655 PetscErrorCode ierr; 656 PetscInt nstash,reallocs; 657 658 PetscFunctionBegin; 659 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 660 661 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 662 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 663 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 668 { 669 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 670 PetscErrorCode ierr; 671 PetscMPIInt n; 672 PetscInt i,j,rstart,ncols,flg; 673 PetscInt *row,*col; 674 PetscBool other_disassembled; 675 PetscScalar *val; 676 677 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 678 679 PetscFunctionBegin; 680 if (!aij->donotstash && !mat->nooffprocentries) { 681 while (1) { 682 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 683 if (!flg) break; 684 685 for (i=0; i<n;) { 686 /* Now identify the consecutive vals belonging to the same row */ 687 for (j=i,rstart=row[j]; j<n; j++) { 688 if (row[j] != rstart) break; 689 } 690 if (j < n) ncols = j-i; 691 else ncols = n-i; 692 /* Now assemble all these values with a single function call */ 693 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 694 i = j; 695 } 696 } 697 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 698 } 699 #if defined(PETSC_HAVE_DEVICE) 700 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 701 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 702 if (mat->boundtocpu) { 703 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 704 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 705 } 706 #endif 707 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 708 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 709 710 /* determine if any processor has disassembled, if so we must 711 also disassemble ourself, in order that we may reassemble. */ 712 /* 713 if nonzero structure of submatrix B cannot change then we know that 714 no processor disassembled thus we can skip this stuff 715 */ 716 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 717 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 718 if (mat->was_assembled && !other_disassembled) { 719 #if defined(PETSC_HAVE_DEVICE) 720 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 721 #endif 722 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 723 } 724 } 725 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 726 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 727 } 728 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 729 #if defined(PETSC_HAVE_DEVICE) 730 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 731 #endif 732 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 733 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 734 735 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 736 737 aij->rowvalues = NULL; 738 739 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 740 741 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 742 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 743 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 744 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 745 } 746 #if defined(PETSC_HAVE_DEVICE) 747 mat->offloadmask = PETSC_OFFLOAD_BOTH; 748 #endif 749 PetscFunctionReturn(0); 750 } 751 752 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 753 { 754 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 755 PetscErrorCode ierr; 756 757 PetscFunctionBegin; 758 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 759 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 760 PetscFunctionReturn(0); 761 } 762 763 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 764 { 765 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 766 PetscObjectState sA, sB; 767 PetscInt *lrows; 768 PetscInt r, len; 769 PetscBool cong, lch, gch; 770 PetscErrorCode ierr; 771 772 PetscFunctionBegin; 773 /* get locally owned rows */ 774 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 775 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 776 /* fix right hand side if needed */ 777 if (x && b) { 778 const PetscScalar *xx; 779 PetscScalar *bb; 780 781 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 782 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 783 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 784 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 785 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 786 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 787 } 788 789 sA = mat->A->nonzerostate; 790 sB = mat->B->nonzerostate; 791 792 if (diag != 0.0 && cong) { 793 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 794 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 795 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 796 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 797 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 798 PetscInt nnwA, nnwB; 799 PetscBool nnzA, nnzB; 800 801 nnwA = aijA->nonew; 802 nnwB = aijB->nonew; 803 nnzA = aijA->keepnonzeropattern; 804 nnzB = aijB->keepnonzeropattern; 805 if (!nnzA) { 806 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 807 aijA->nonew = 0; 808 } 809 if (!nnzB) { 810 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 811 aijB->nonew = 0; 812 } 813 /* Must zero here before the next loop */ 814 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 815 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 816 for (r = 0; r < len; ++r) { 817 const PetscInt row = lrows[r] + A->rmap->rstart; 818 if (row >= A->cmap->N) continue; 819 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 820 } 821 aijA->nonew = nnwA; 822 aijB->nonew = nnwB; 823 } else { 824 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 825 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 826 } 827 ierr = PetscFree(lrows);CHKERRQ(ierr); 828 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 829 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 830 831 /* reduce nonzerostate */ 832 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 833 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 834 if (gch) A->nonzerostate++; 835 PetscFunctionReturn(0); 836 } 837 838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 839 { 840 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 841 PetscErrorCode ierr; 842 PetscMPIInt n = A->rmap->n; 843 PetscInt i,j,r,m,len = 0; 844 PetscInt *lrows,*owners = A->rmap->range; 845 PetscMPIInt p = 0; 846 PetscSFNode *rrows; 847 PetscSF sf; 848 const PetscScalar *xx; 849 PetscScalar *bb,*mask; 850 Vec xmask,lmask; 851 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 852 const PetscInt *aj, *ii,*ridx; 853 PetscScalar *aa; 854 855 PetscFunctionBegin; 856 /* Create SF where leaves are input rows and roots are owned rows */ 857 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 858 for (r = 0; r < n; ++r) lrows[r] = -1; 859 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 860 for (r = 0; r < N; ++r) { 861 const PetscInt idx = rows[r]; 862 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 863 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 864 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 865 } 866 rrows[r].rank = p; 867 rrows[r].index = rows[r] - owners[p]; 868 } 869 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 870 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 871 /* Collect flags for rows to be zeroed */ 872 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 873 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 874 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 875 /* Compress and put in row numbers */ 876 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 877 /* zero diagonal part of matrix */ 878 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 879 /* handle off diagonal part of matrix */ 880 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 881 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 882 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 883 for (i=0; i<len; i++) bb[lrows[i]] = 1; 884 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 885 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 887 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 888 if (x && b) { /* this code is buggy when the row and column layout don't match */ 889 PetscBool cong; 890 891 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 892 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 893 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 894 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 895 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 896 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 897 } 898 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 899 /* remove zeroed rows of off diagonal matrix */ 900 ii = aij->i; 901 for (i=0; i<len; i++) { 902 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 903 } 904 /* loop over all elements of off process part of matrix zeroing removed columns*/ 905 if (aij->compressedrow.use) { 906 m = aij->compressedrow.nrows; 907 ii = aij->compressedrow.i; 908 ridx = aij->compressedrow.rindex; 909 for (i=0; i<m; i++) { 910 n = ii[i+1] - ii[i]; 911 aj = aij->j + ii[i]; 912 aa = aij->a + ii[i]; 913 914 for (j=0; j<n; j++) { 915 if (PetscAbsScalar(mask[*aj])) { 916 if (b) bb[*ridx] -= *aa*xx[*aj]; 917 *aa = 0.0; 918 } 919 aa++; 920 aj++; 921 } 922 ridx++; 923 } 924 } else { /* do not use compressed row format */ 925 m = l->B->rmap->n; 926 for (i=0; i<m; i++) { 927 n = ii[i+1] - ii[i]; 928 aj = aij->j + ii[i]; 929 aa = aij->a + ii[i]; 930 for (j=0; j<n; j++) { 931 if (PetscAbsScalar(mask[*aj])) { 932 if (b) bb[i] -= *aa*xx[*aj]; 933 *aa = 0.0; 934 } 935 aa++; 936 aj++; 937 } 938 } 939 } 940 if (x && b) { 941 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 942 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 943 } 944 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 945 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 946 ierr = PetscFree(lrows);CHKERRQ(ierr); 947 948 /* only change matrix nonzero state if pattern was allowed to be changed */ 949 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 950 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 951 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 952 } 953 PetscFunctionReturn(0); 954 } 955 956 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 957 { 958 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 959 PetscErrorCode ierr; 960 PetscInt nt; 961 VecScatter Mvctx = a->Mvctx; 962 963 PetscFunctionBegin; 964 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 965 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 966 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 967 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 968 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 969 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } 972 973 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 974 { 975 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 976 PetscErrorCode ierr; 977 978 PetscFunctionBegin; 979 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 980 PetscFunctionReturn(0); 981 } 982 983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 984 { 985 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 986 PetscErrorCode ierr; 987 VecScatter Mvctx = a->Mvctx; 988 989 PetscFunctionBegin; 990 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 991 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 992 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 993 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } 996 997 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 998 { 999 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1000 PetscErrorCode ierr; 1001 1002 PetscFunctionBegin; 1003 /* do nondiagonal part */ 1004 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1005 /* do local part */ 1006 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1007 /* add partial results together */ 1008 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1009 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } 1012 1013 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1014 { 1015 MPI_Comm comm; 1016 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1017 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1018 IS Me,Notme; 1019 PetscErrorCode ierr; 1020 PetscInt M,N,first,last,*notme,i; 1021 PetscBool lf; 1022 PetscMPIInt size; 1023 1024 PetscFunctionBegin; 1025 /* Easy test: symmetric diagonal block */ 1026 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1027 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1028 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1029 if (!*f) PetscFunctionReturn(0); 1030 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1031 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 1032 if (size == 1) PetscFunctionReturn(0); 1033 1034 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1035 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1036 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1037 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1038 for (i=0; i<first; i++) notme[i] = i; 1039 for (i=last; i<M; i++) notme[i-last+first] = i; 1040 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1041 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1042 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1043 Aoff = Aoffs[0]; 1044 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1045 Boff = Boffs[0]; 1046 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1047 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1048 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1049 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1050 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1051 ierr = PetscFree(notme);CHKERRQ(ierr); 1052 PetscFunctionReturn(0); 1053 } 1054 1055 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1056 { 1057 PetscErrorCode ierr; 1058 1059 PetscFunctionBegin; 1060 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1061 PetscFunctionReturn(0); 1062 } 1063 1064 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1065 { 1066 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1067 PetscErrorCode ierr; 1068 1069 PetscFunctionBegin; 1070 /* do nondiagonal part */ 1071 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1072 /* do local part */ 1073 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1074 /* add partial results together */ 1075 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1076 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1077 PetscFunctionReturn(0); 1078 } 1079 1080 /* 1081 This only works correctly for square matrices where the subblock A->A is the 1082 diagonal block 1083 */ 1084 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1085 { 1086 PetscErrorCode ierr; 1087 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1088 1089 PetscFunctionBegin; 1090 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1091 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1092 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1097 { 1098 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1099 PetscErrorCode ierr; 1100 1101 PetscFunctionBegin; 1102 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1103 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1108 { 1109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1110 PetscErrorCode ierr; 1111 1112 PetscFunctionBegin; 1113 #if defined(PETSC_USE_LOG) 1114 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1115 #endif 1116 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1117 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1118 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1119 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1120 #if defined(PETSC_USE_CTABLE) 1121 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1122 #else 1123 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1124 #endif 1125 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1126 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1127 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1128 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1129 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1130 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1131 1132 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1133 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1134 1135 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1136 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1137 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1138 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1139 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1140 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1141 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1142 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1143 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1144 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1145 #if defined(PETSC_HAVE_CUDA) 1146 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1147 #endif 1148 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1149 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr); 1150 #endif 1151 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 1152 #if defined(PETSC_HAVE_ELEMENTAL) 1153 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1154 #endif 1155 #if defined(PETSC_HAVE_SCALAPACK) 1156 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1157 #endif 1158 #if defined(PETSC_HAVE_HYPRE) 1159 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1160 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1161 #endif 1162 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1163 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1164 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1165 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr); 1168 #if defined(PETSC_HAVE_MKL_SPARSE) 1169 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr); 1170 #endif 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr); 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1178 { 1179 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1180 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1181 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1182 const PetscInt *garray = aij->garray; 1183 const PetscScalar *aa,*ba; 1184 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1185 PetscInt *rowlens; 1186 PetscInt *colidxs; 1187 PetscScalar *matvals; 1188 PetscErrorCode ierr; 1189 1190 PetscFunctionBegin; 1191 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1192 1193 M = mat->rmap->N; 1194 N = mat->cmap->N; 1195 m = mat->rmap->n; 1196 rs = mat->rmap->rstart; 1197 cs = mat->cmap->rstart; 1198 nz = A->nz + B->nz; 1199 1200 /* write matrix header */ 1201 header[0] = MAT_FILE_CLASSID; 1202 header[1] = M; header[2] = N; header[3] = nz; 1203 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1204 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1205 1206 /* fill in and store row lengths */ 1207 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1208 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1209 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1210 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1211 1212 /* fill in and store column indices */ 1213 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1214 for (cnt=0, i=0; i<m; i++) { 1215 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1216 if (garray[B->j[jb]] > cs) break; 1217 colidxs[cnt++] = garray[B->j[jb]]; 1218 } 1219 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1220 colidxs[cnt++] = A->j[ja] + cs; 1221 for (; jb<B->i[i+1]; jb++) 1222 colidxs[cnt++] = garray[B->j[jb]]; 1223 } 1224 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1225 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1226 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1227 1228 /* fill in and store nonzero values */ 1229 ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr); 1230 ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr); 1231 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1232 for (cnt=0, i=0; i<m; i++) { 1233 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1234 if (garray[B->j[jb]] > cs) break; 1235 matvals[cnt++] = ba[jb]; 1236 } 1237 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1238 matvals[cnt++] = aa[ja]; 1239 for (; jb<B->i[i+1]; jb++) 1240 matvals[cnt++] = ba[jb]; 1241 } 1242 ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr); 1243 ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr); 1244 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1245 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1246 ierr = PetscFree(matvals);CHKERRQ(ierr); 1247 1248 /* write block size option to the viewer's .info file */ 1249 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #include <petscdraw.h> 1254 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1255 { 1256 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1257 PetscErrorCode ierr; 1258 PetscMPIInt rank = aij->rank,size = aij->size; 1259 PetscBool isdraw,iascii,isbinary; 1260 PetscViewer sviewer; 1261 PetscViewerFormat format; 1262 1263 PetscFunctionBegin; 1264 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1265 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1266 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1267 if (iascii) { 1268 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1269 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1270 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1271 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1272 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1273 for (i=0; i<(PetscInt)size; i++) { 1274 nmax = PetscMax(nmax,nz[i]); 1275 nmin = PetscMin(nmin,nz[i]); 1276 navg += nz[i]; 1277 } 1278 ierr = PetscFree(nz);CHKERRQ(ierr); 1279 navg = navg/size; 1280 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1281 PetscFunctionReturn(0); 1282 } 1283 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1284 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1285 MatInfo info; 1286 PetscInt *inodes=NULL; 1287 1288 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 1289 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1290 ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr); 1291 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1292 if (!inodes) { 1293 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1294 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1295 } else { 1296 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1297 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1298 } 1299 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1300 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1301 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1302 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1303 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1304 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1305 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1306 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1309 PetscInt inodecount,inodelimit,*inodes; 1310 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1311 if (inodes) { 1312 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1313 } else { 1314 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1315 } 1316 PetscFunctionReturn(0); 1317 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1318 PetscFunctionReturn(0); 1319 } 1320 } else if (isbinary) { 1321 if (size == 1) { 1322 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1323 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1324 } else { 1325 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1326 } 1327 PetscFunctionReturn(0); 1328 } else if (iascii && size == 1) { 1329 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1330 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1331 PetscFunctionReturn(0); 1332 } else if (isdraw) { 1333 PetscDraw draw; 1334 PetscBool isnull; 1335 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1336 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1337 if (isnull) PetscFunctionReturn(0); 1338 } 1339 1340 { /* assemble the entire matrix onto first processor */ 1341 Mat A = NULL, Av; 1342 IS isrow,iscol; 1343 1344 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1345 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1346 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1347 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1348 /* The commented code uses MatCreateSubMatrices instead */ 1349 /* 1350 Mat *AA, A = NULL, Av; 1351 IS isrow,iscol; 1352 1353 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1354 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1355 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1356 if (!rank) { 1357 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1358 A = AA[0]; 1359 Av = AA[0]; 1360 } 1361 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1362 */ 1363 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1364 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1365 /* 1366 Everyone has to call to draw the matrix since the graphics waits are 1367 synchronized across all processors that share the PetscDraw object 1368 */ 1369 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1370 if (!rank) { 1371 if (((PetscObject)mat)->name) { 1372 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1373 } 1374 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1375 } 1376 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1377 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1378 ierr = MatDestroy(&A);CHKERRQ(ierr); 1379 } 1380 PetscFunctionReturn(0); 1381 } 1382 1383 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1384 { 1385 PetscErrorCode ierr; 1386 PetscBool iascii,isdraw,issocket,isbinary; 1387 1388 PetscFunctionBegin; 1389 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1390 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1391 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1392 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1393 if (iascii || isdraw || isbinary || issocket) { 1394 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1395 } 1396 PetscFunctionReturn(0); 1397 } 1398 1399 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1400 { 1401 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1402 PetscErrorCode ierr; 1403 Vec bb1 = NULL; 1404 PetscBool hasop; 1405 1406 PetscFunctionBegin; 1407 if (flag == SOR_APPLY_UPPER) { 1408 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1409 PetscFunctionReturn(0); 1410 } 1411 1412 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1413 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1414 } 1415 1416 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1417 if (flag & SOR_ZERO_INITIAL_GUESS) { 1418 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1419 its--; 1420 } 1421 1422 while (its--) { 1423 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1424 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1425 1426 /* update rhs: bb1 = bb - B*x */ 1427 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1428 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1429 1430 /* local sweep */ 1431 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1432 } 1433 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1434 if (flag & SOR_ZERO_INITIAL_GUESS) { 1435 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1436 its--; 1437 } 1438 while (its--) { 1439 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1440 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1441 1442 /* update rhs: bb1 = bb - B*x */ 1443 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1444 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1445 1446 /* local sweep */ 1447 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1448 } 1449 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1450 if (flag & SOR_ZERO_INITIAL_GUESS) { 1451 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1452 its--; 1453 } 1454 while (its--) { 1455 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1456 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1457 1458 /* update rhs: bb1 = bb - B*x */ 1459 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1460 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1461 1462 /* local sweep */ 1463 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1464 } 1465 } else if (flag & SOR_EISENSTAT) { 1466 Vec xx1; 1467 1468 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1469 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1470 1471 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1472 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1473 if (!mat->diag) { 1474 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1475 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1476 } 1477 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1478 if (hasop) { 1479 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1480 } else { 1481 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1482 } 1483 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1484 1485 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1486 1487 /* local sweep */ 1488 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1489 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1490 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1491 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1492 1493 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1494 1495 matin->factorerrortype = mat->A->factorerrortype; 1496 PetscFunctionReturn(0); 1497 } 1498 1499 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1500 { 1501 Mat aA,aB,Aperm; 1502 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1503 PetscScalar *aa,*ba; 1504 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1505 PetscSF rowsf,sf; 1506 IS parcolp = NULL; 1507 PetscBool done; 1508 PetscErrorCode ierr; 1509 1510 PetscFunctionBegin; 1511 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1512 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1513 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1514 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1515 1516 /* Invert row permutation to find out where my rows should go */ 1517 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1518 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1519 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1520 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1521 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1522 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1523 1524 /* Invert column permutation to find out where my columns should go */ 1525 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1526 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1527 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1528 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1529 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1530 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1531 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1532 1533 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1534 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1535 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1536 1537 /* Find out where my gcols should go */ 1538 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1539 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1540 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1541 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1542 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1543 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1544 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1545 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1546 1547 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1548 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1549 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1550 for (i=0; i<m; i++) { 1551 PetscInt row = rdest[i]; 1552 PetscMPIInt rowner; 1553 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1554 for (j=ai[i]; j<ai[i+1]; j++) { 1555 PetscInt col = cdest[aj[j]]; 1556 PetscMPIInt cowner; 1557 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1558 if (rowner == cowner) dnnz[i]++; 1559 else onnz[i]++; 1560 } 1561 for (j=bi[i]; j<bi[i+1]; j++) { 1562 PetscInt col = gcdest[bj[j]]; 1563 PetscMPIInt cowner; 1564 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1565 if (rowner == cowner) dnnz[i]++; 1566 else onnz[i]++; 1567 } 1568 } 1569 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1570 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1571 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1572 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1573 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1574 1575 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1576 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1577 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1578 for (i=0; i<m; i++) { 1579 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1580 PetscInt j0,rowlen; 1581 rowlen = ai[i+1] - ai[i]; 1582 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1583 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1584 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1585 } 1586 rowlen = bi[i+1] - bi[i]; 1587 for (j0=j=0; j<rowlen; j0=j) { 1588 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1589 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1590 } 1591 } 1592 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1593 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1594 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1595 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1596 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1597 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1598 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1599 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1600 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1601 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1602 *B = Aperm; 1603 PetscFunctionReturn(0); 1604 } 1605 1606 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1607 { 1608 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1609 PetscErrorCode ierr; 1610 1611 PetscFunctionBegin; 1612 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1613 if (ghosts) *ghosts = aij->garray; 1614 PetscFunctionReturn(0); 1615 } 1616 1617 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1618 { 1619 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1620 Mat A = mat->A,B = mat->B; 1621 PetscErrorCode ierr; 1622 PetscLogDouble isend[5],irecv[5]; 1623 1624 PetscFunctionBegin; 1625 info->block_size = 1.0; 1626 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1627 1628 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1629 isend[3] = info->memory; isend[4] = info->mallocs; 1630 1631 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1632 1633 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1634 isend[3] += info->memory; isend[4] += info->mallocs; 1635 if (flag == MAT_LOCAL) { 1636 info->nz_used = isend[0]; 1637 info->nz_allocated = isend[1]; 1638 info->nz_unneeded = isend[2]; 1639 info->memory = isend[3]; 1640 info->mallocs = isend[4]; 1641 } else if (flag == MAT_GLOBAL_MAX) { 1642 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1643 1644 info->nz_used = irecv[0]; 1645 info->nz_allocated = irecv[1]; 1646 info->nz_unneeded = irecv[2]; 1647 info->memory = irecv[3]; 1648 info->mallocs = irecv[4]; 1649 } else if (flag == MAT_GLOBAL_SUM) { 1650 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1651 1652 info->nz_used = irecv[0]; 1653 info->nz_allocated = irecv[1]; 1654 info->nz_unneeded = irecv[2]; 1655 info->memory = irecv[3]; 1656 info->mallocs = irecv[4]; 1657 } 1658 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1659 info->fill_ratio_needed = 0; 1660 info->factor_mallocs = 0; 1661 PetscFunctionReturn(0); 1662 } 1663 1664 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1665 { 1666 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1667 PetscErrorCode ierr; 1668 1669 PetscFunctionBegin; 1670 switch (op) { 1671 case MAT_NEW_NONZERO_LOCATIONS: 1672 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1673 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1674 case MAT_KEEP_NONZERO_PATTERN: 1675 case MAT_NEW_NONZERO_LOCATION_ERR: 1676 case MAT_USE_INODES: 1677 case MAT_IGNORE_ZERO_ENTRIES: 1678 case MAT_FORM_EXPLICIT_TRANSPOSE: 1679 MatCheckPreallocated(A,1); 1680 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1681 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1682 break; 1683 case MAT_ROW_ORIENTED: 1684 MatCheckPreallocated(A,1); 1685 a->roworiented = flg; 1686 1687 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1688 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1689 break; 1690 case MAT_FORCE_DIAGONAL_ENTRIES: 1691 case MAT_SORTED_FULL: 1692 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1693 break; 1694 case MAT_IGNORE_OFF_PROC_ENTRIES: 1695 a->donotstash = flg; 1696 break; 1697 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1698 case MAT_SPD: 1699 case MAT_SYMMETRIC: 1700 case MAT_STRUCTURALLY_SYMMETRIC: 1701 case MAT_HERMITIAN: 1702 case MAT_SYMMETRY_ETERNAL: 1703 break; 1704 case MAT_SUBMAT_SINGLEIS: 1705 A->submat_singleis = flg; 1706 break; 1707 case MAT_STRUCTURE_ONLY: 1708 /* The option is handled directly by MatSetOption() */ 1709 break; 1710 default: 1711 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1712 } 1713 PetscFunctionReturn(0); 1714 } 1715 1716 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1717 { 1718 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1719 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1720 PetscErrorCode ierr; 1721 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1722 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1723 PetscInt *cmap,*idx_p; 1724 1725 PetscFunctionBegin; 1726 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1727 mat->getrowactive = PETSC_TRUE; 1728 1729 if (!mat->rowvalues && (idx || v)) { 1730 /* 1731 allocate enough space to hold information from the longest row. 1732 */ 1733 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1734 PetscInt max = 1,tmp; 1735 for (i=0; i<matin->rmap->n; i++) { 1736 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1737 if (max < tmp) max = tmp; 1738 } 1739 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1740 } 1741 1742 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1743 lrow = row - rstart; 1744 1745 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1746 if (!v) {pvA = NULL; pvB = NULL;} 1747 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1748 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1749 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1750 nztot = nzA + nzB; 1751 1752 cmap = mat->garray; 1753 if (v || idx) { 1754 if (nztot) { 1755 /* Sort by increasing column numbers, assuming A and B already sorted */ 1756 PetscInt imark = -1; 1757 if (v) { 1758 *v = v_p = mat->rowvalues; 1759 for (i=0; i<nzB; i++) { 1760 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1761 else break; 1762 } 1763 imark = i; 1764 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1765 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1766 } 1767 if (idx) { 1768 *idx = idx_p = mat->rowindices; 1769 if (imark > -1) { 1770 for (i=0; i<imark; i++) { 1771 idx_p[i] = cmap[cworkB[i]]; 1772 } 1773 } else { 1774 for (i=0; i<nzB; i++) { 1775 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1776 else break; 1777 } 1778 imark = i; 1779 } 1780 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1781 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1782 } 1783 } else { 1784 if (idx) *idx = NULL; 1785 if (v) *v = NULL; 1786 } 1787 } 1788 *nz = nztot; 1789 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1790 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1791 PetscFunctionReturn(0); 1792 } 1793 1794 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1795 { 1796 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1797 1798 PetscFunctionBegin; 1799 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1800 aij->getrowactive = PETSC_FALSE; 1801 PetscFunctionReturn(0); 1802 } 1803 1804 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1805 { 1806 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1807 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1808 PetscErrorCode ierr; 1809 PetscInt i,j,cstart = mat->cmap->rstart; 1810 PetscReal sum = 0.0; 1811 MatScalar *v; 1812 1813 PetscFunctionBegin; 1814 if (aij->size == 1) { 1815 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1816 } else { 1817 if (type == NORM_FROBENIUS) { 1818 v = amat->a; 1819 for (i=0; i<amat->nz; i++) { 1820 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1821 } 1822 v = bmat->a; 1823 for (i=0; i<bmat->nz; i++) { 1824 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1825 } 1826 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1827 *norm = PetscSqrtReal(*norm); 1828 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1829 } else if (type == NORM_1) { /* max column norm */ 1830 PetscReal *tmp,*tmp2; 1831 PetscInt *jj,*garray = aij->garray; 1832 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1833 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1834 *norm = 0.0; 1835 v = amat->a; jj = amat->j; 1836 for (j=0; j<amat->nz; j++) { 1837 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1838 } 1839 v = bmat->a; jj = bmat->j; 1840 for (j=0; j<bmat->nz; j++) { 1841 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1842 } 1843 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1844 for (j=0; j<mat->cmap->N; j++) { 1845 if (tmp2[j] > *norm) *norm = tmp2[j]; 1846 } 1847 ierr = PetscFree(tmp);CHKERRQ(ierr); 1848 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1849 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1850 } else if (type == NORM_INFINITY) { /* max row norm */ 1851 PetscReal ntemp = 0.0; 1852 for (j=0; j<aij->A->rmap->n; j++) { 1853 v = amat->a + amat->i[j]; 1854 sum = 0.0; 1855 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1856 sum += PetscAbsScalar(*v); v++; 1857 } 1858 v = bmat->a + bmat->i[j]; 1859 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1860 sum += PetscAbsScalar(*v); v++; 1861 } 1862 if (sum > ntemp) ntemp = sum; 1863 } 1864 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1865 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1866 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1867 } 1868 PetscFunctionReturn(0); 1869 } 1870 1871 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1872 { 1873 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1874 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1875 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1876 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1877 PetscErrorCode ierr; 1878 Mat B,A_diag,*B_diag; 1879 const MatScalar *pbv,*bv; 1880 1881 PetscFunctionBegin; 1882 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1883 ai = Aloc->i; aj = Aloc->j; 1884 bi = Bloc->i; bj = Bloc->j; 1885 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1886 PetscInt *d_nnz,*g_nnz,*o_nnz; 1887 PetscSFNode *oloc; 1888 PETSC_UNUSED PetscSF sf; 1889 1890 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1891 /* compute d_nnz for preallocation */ 1892 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 1893 for (i=0; i<ai[ma]; i++) { 1894 d_nnz[aj[i]]++; 1895 } 1896 /* compute local off-diagonal contributions */ 1897 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 1898 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1899 /* map those to global */ 1900 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1901 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1902 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1903 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 1904 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1905 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1906 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1907 1908 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1909 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1910 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1911 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1912 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1913 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1914 } else { 1915 B = *matout; 1916 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1917 } 1918 1919 b = (Mat_MPIAIJ*)B->data; 1920 A_diag = a->A; 1921 B_diag = &b->A; 1922 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1923 A_diag_ncol = A_diag->cmap->N; 1924 B_diag_ilen = sub_B_diag->ilen; 1925 B_diag_i = sub_B_diag->i; 1926 1927 /* Set ilen for diagonal of B */ 1928 for (i=0; i<A_diag_ncol; i++) { 1929 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1930 } 1931 1932 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1933 very quickly (=without using MatSetValues), because all writes are local. */ 1934 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 1935 1936 /* copy over the B part */ 1937 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 1938 ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr); 1939 pbv = bv; 1940 row = A->rmap->rstart; 1941 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1942 cols_tmp = cols; 1943 for (i=0; i<mb; i++) { 1944 ncol = bi[i+1]-bi[i]; 1945 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr); 1946 row++; 1947 pbv += ncol; cols_tmp += ncol; 1948 } 1949 ierr = PetscFree(cols);CHKERRQ(ierr); 1950 ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr); 1951 1952 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1953 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1954 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1955 *matout = B; 1956 } else { 1957 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1958 } 1959 PetscFunctionReturn(0); 1960 } 1961 1962 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1963 { 1964 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1965 Mat a = aij->A,b = aij->B; 1966 PetscErrorCode ierr; 1967 PetscInt s1,s2,s3; 1968 1969 PetscFunctionBegin; 1970 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1971 if (rr) { 1972 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1973 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1974 /* Overlap communication with computation. */ 1975 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1976 } 1977 if (ll) { 1978 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1979 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1980 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1981 } 1982 /* scale the diagonal block */ 1983 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1984 1985 if (rr) { 1986 /* Do a scatter end and then right scale the off-diagonal block */ 1987 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1988 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 1989 } 1990 PetscFunctionReturn(0); 1991 } 1992 1993 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1994 { 1995 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1996 PetscErrorCode ierr; 1997 1998 PetscFunctionBegin; 1999 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2000 PetscFunctionReturn(0); 2001 } 2002 2003 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2004 { 2005 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2006 Mat a,b,c,d; 2007 PetscBool flg; 2008 PetscErrorCode ierr; 2009 2010 PetscFunctionBegin; 2011 a = matA->A; b = matA->B; 2012 c = matB->A; d = matB->B; 2013 2014 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2015 if (flg) { 2016 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2017 } 2018 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2019 PetscFunctionReturn(0); 2020 } 2021 2022 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2023 { 2024 PetscErrorCode ierr; 2025 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2026 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2027 2028 PetscFunctionBegin; 2029 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2030 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2031 /* because of the column compression in the off-processor part of the matrix a->B, 2032 the number of columns in a->B and b->B may be different, hence we cannot call 2033 the MatCopy() directly on the two parts. If need be, we can provide a more 2034 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2035 then copying the submatrices */ 2036 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2037 } else { 2038 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2039 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2040 } 2041 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2042 PetscFunctionReturn(0); 2043 } 2044 2045 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2046 { 2047 PetscErrorCode ierr; 2048 2049 PetscFunctionBegin; 2050 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2051 PetscFunctionReturn(0); 2052 } 2053 2054 /* 2055 Computes the number of nonzeros per row needed for preallocation when X and Y 2056 have different nonzero structure. 2057 */ 2058 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2059 { 2060 PetscInt i,j,k,nzx,nzy; 2061 2062 PetscFunctionBegin; 2063 /* Set the number of nonzeros in the new matrix */ 2064 for (i=0; i<m; i++) { 2065 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2066 nzx = xi[i+1] - xi[i]; 2067 nzy = yi[i+1] - yi[i]; 2068 nnz[i] = 0; 2069 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2070 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2071 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2072 nnz[i]++; 2073 } 2074 for (; k<nzy; k++) nnz[i]++; 2075 } 2076 PetscFunctionReturn(0); 2077 } 2078 2079 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2080 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2081 { 2082 PetscErrorCode ierr; 2083 PetscInt m = Y->rmap->N; 2084 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2085 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2086 2087 PetscFunctionBegin; 2088 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2089 PetscFunctionReturn(0); 2090 } 2091 2092 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2093 { 2094 PetscErrorCode ierr; 2095 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2096 2097 PetscFunctionBegin; 2098 if (str == SAME_NONZERO_PATTERN) { 2099 ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr); 2100 ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr); 2101 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2102 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2103 } else { 2104 Mat B; 2105 PetscInt *nnz_d,*nnz_o; 2106 2107 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2108 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2109 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2110 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2111 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2112 ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr); 2113 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2114 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2115 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2116 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2117 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2118 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2119 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2120 } 2121 PetscFunctionReturn(0); 2122 } 2123 2124 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2125 2126 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2127 { 2128 #if defined(PETSC_USE_COMPLEX) 2129 PetscErrorCode ierr; 2130 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2131 2132 PetscFunctionBegin; 2133 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2134 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2135 #else 2136 PetscFunctionBegin; 2137 #endif 2138 PetscFunctionReturn(0); 2139 } 2140 2141 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2142 { 2143 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2144 PetscErrorCode ierr; 2145 2146 PetscFunctionBegin; 2147 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2148 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2149 PetscFunctionReturn(0); 2150 } 2151 2152 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2153 { 2154 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2155 PetscErrorCode ierr; 2156 2157 PetscFunctionBegin; 2158 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2159 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2160 PetscFunctionReturn(0); 2161 } 2162 2163 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2164 { 2165 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2166 PetscErrorCode ierr; 2167 PetscInt i,*idxb = NULL,m = A->rmap->n; 2168 PetscScalar *va,*vv; 2169 Vec vB,vA; 2170 const PetscScalar *vb; 2171 2172 PetscFunctionBegin; 2173 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 2174 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2175 2176 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2177 if (idx) { 2178 for (i=0; i<m; i++) { 2179 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2180 } 2181 } 2182 2183 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 2184 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 2185 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2186 2187 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2188 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2189 for (i=0; i<m; i++) { 2190 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2191 vv[i] = vb[i]; 2192 if (idx) idx[i] = a->garray[idxb[i]]; 2193 } else { 2194 vv[i] = va[i]; 2195 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2196 idx[i] = a->garray[idxb[i]]; 2197 } 2198 } 2199 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2200 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2201 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2202 ierr = PetscFree(idxb);CHKERRQ(ierr); 2203 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2204 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2205 PetscFunctionReturn(0); 2206 } 2207 2208 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2209 { 2210 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2211 PetscInt m = A->rmap->n,n = A->cmap->n; 2212 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2213 PetscInt *cmap = mat->garray; 2214 PetscInt *diagIdx, *offdiagIdx; 2215 Vec diagV, offdiagV; 2216 PetscScalar *a, *diagA, *offdiagA; 2217 const PetscScalar *ba,*bav; 2218 PetscInt r,j,col,ncols,*bi,*bj; 2219 PetscErrorCode ierr; 2220 Mat B = mat->B; 2221 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2222 2223 PetscFunctionBegin; 2224 /* When a process holds entire A and other processes have no entry */ 2225 if (A->cmap->N == n) { 2226 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2227 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2228 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr); 2229 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2230 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2231 PetscFunctionReturn(0); 2232 } else if (n == 0) { 2233 if (m) { 2234 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2235 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2236 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2237 } 2238 PetscFunctionReturn(0); 2239 } 2240 2241 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2242 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2243 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2244 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2245 2246 /* Get offdiagIdx[] for implicit 0.0 */ 2247 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2248 ba = bav; 2249 bi = b->i; 2250 bj = b->j; 2251 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2252 for (r = 0; r < m; r++) { 2253 ncols = bi[r+1] - bi[r]; 2254 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2255 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2256 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2257 offdiagA[r] = 0.0; 2258 2259 /* Find first hole in the cmap */ 2260 for (j=0; j<ncols; j++) { 2261 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2262 if (col > j && j < cstart) { 2263 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2264 break; 2265 } else if (col > j + n && j >= cstart) { 2266 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2267 break; 2268 } 2269 } 2270 if (j == ncols && ncols < A->cmap->N - n) { 2271 /* a hole is outside compressed Bcols */ 2272 if (ncols == 0) { 2273 if (cstart) { 2274 offdiagIdx[r] = 0; 2275 } else offdiagIdx[r] = cend; 2276 } else { /* ncols > 0 */ 2277 offdiagIdx[r] = cmap[ncols-1] + 1; 2278 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2279 } 2280 } 2281 } 2282 2283 for (j=0; j<ncols; j++) { 2284 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2285 ba++; bj++; 2286 } 2287 } 2288 2289 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2290 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2291 for (r = 0; r < m; ++r) { 2292 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2293 a[r] = diagA[r]; 2294 if (idx) idx[r] = cstart + diagIdx[r]; 2295 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2296 a[r] = diagA[r]; 2297 if (idx) { 2298 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2299 idx[r] = cstart + diagIdx[r]; 2300 } else idx[r] = offdiagIdx[r]; 2301 } 2302 } else { 2303 a[r] = offdiagA[r]; 2304 if (idx) idx[r] = offdiagIdx[r]; 2305 } 2306 } 2307 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2308 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2309 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2310 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2311 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2312 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2313 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2314 PetscFunctionReturn(0); 2315 } 2316 2317 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2318 { 2319 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2320 PetscInt m = A->rmap->n,n = A->cmap->n; 2321 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2322 PetscInt *cmap = mat->garray; 2323 PetscInt *diagIdx, *offdiagIdx; 2324 Vec diagV, offdiagV; 2325 PetscScalar *a, *diagA, *offdiagA; 2326 const PetscScalar *ba,*bav; 2327 PetscInt r,j,col,ncols,*bi,*bj; 2328 PetscErrorCode ierr; 2329 Mat B = mat->B; 2330 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2331 2332 PetscFunctionBegin; 2333 /* When a process holds entire A and other processes have no entry */ 2334 if (A->cmap->N == n) { 2335 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2336 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2337 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2338 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2339 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2340 PetscFunctionReturn(0); 2341 } else if (n == 0) { 2342 if (m) { 2343 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2344 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2345 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2346 } 2347 PetscFunctionReturn(0); 2348 } 2349 2350 ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2351 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2352 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2353 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2354 2355 /* Get offdiagIdx[] for implicit 0.0 */ 2356 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2357 ba = bav; 2358 bi = b->i; 2359 bj = b->j; 2360 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2361 for (r = 0; r < m; r++) { 2362 ncols = bi[r+1] - bi[r]; 2363 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2364 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2365 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2366 offdiagA[r] = 0.0; 2367 2368 /* Find first hole in the cmap */ 2369 for (j=0; j<ncols; j++) { 2370 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2371 if (col > j && j < cstart) { 2372 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2373 break; 2374 } else if (col > j + n && j >= cstart) { 2375 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2376 break; 2377 } 2378 } 2379 if (j == ncols && ncols < A->cmap->N - n) { 2380 /* a hole is outside compressed Bcols */ 2381 if (ncols == 0) { 2382 if (cstart) { 2383 offdiagIdx[r] = 0; 2384 } else offdiagIdx[r] = cend; 2385 } else { /* ncols > 0 */ 2386 offdiagIdx[r] = cmap[ncols-1] + 1; 2387 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2388 } 2389 } 2390 } 2391 2392 for (j=0; j<ncols; j++) { 2393 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2394 ba++; bj++; 2395 } 2396 } 2397 2398 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2399 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2400 for (r = 0; r < m; ++r) { 2401 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2402 a[r] = diagA[r]; 2403 if (idx) idx[r] = cstart + diagIdx[r]; 2404 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2405 a[r] = diagA[r]; 2406 if (idx) { 2407 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2408 idx[r] = cstart + diagIdx[r]; 2409 } else idx[r] = offdiagIdx[r]; 2410 } 2411 } else { 2412 a[r] = offdiagA[r]; 2413 if (idx) idx[r] = offdiagIdx[r]; 2414 } 2415 } 2416 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2417 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2418 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2419 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2420 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2421 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2422 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2423 PetscFunctionReturn(0); 2424 } 2425 2426 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2427 { 2428 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2429 PetscInt m = A->rmap->n,n = A->cmap->n; 2430 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2431 PetscInt *cmap = mat->garray; 2432 PetscInt *diagIdx, *offdiagIdx; 2433 Vec diagV, offdiagV; 2434 PetscScalar *a, *diagA, *offdiagA; 2435 const PetscScalar *ba,*bav; 2436 PetscInt r,j,col,ncols,*bi,*bj; 2437 PetscErrorCode ierr; 2438 Mat B = mat->B; 2439 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2440 2441 PetscFunctionBegin; 2442 /* When a process holds entire A and other processes have no entry */ 2443 if (A->cmap->N == n) { 2444 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2445 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2446 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2447 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2448 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2449 PetscFunctionReturn(0); 2450 } else if (n == 0) { 2451 if (m) { 2452 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2453 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2454 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2455 } 2456 PetscFunctionReturn(0); 2457 } 2458 2459 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2460 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2461 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2462 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2463 2464 /* Get offdiagIdx[] for implicit 0.0 */ 2465 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2466 ba = bav; 2467 bi = b->i; 2468 bj = b->j; 2469 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2470 for (r = 0; r < m; r++) { 2471 ncols = bi[r+1] - bi[r]; 2472 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2473 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2474 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2475 offdiagA[r] = 0.0; 2476 2477 /* Find first hole in the cmap */ 2478 for (j=0; j<ncols; j++) { 2479 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2480 if (col > j && j < cstart) { 2481 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2482 break; 2483 } else if (col > j + n && j >= cstart) { 2484 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2485 break; 2486 } 2487 } 2488 if (j == ncols && ncols < A->cmap->N - n) { 2489 /* a hole is outside compressed Bcols */ 2490 if (ncols == 0) { 2491 if (cstart) { 2492 offdiagIdx[r] = 0; 2493 } else offdiagIdx[r] = cend; 2494 } else { /* ncols > 0 */ 2495 offdiagIdx[r] = cmap[ncols-1] + 1; 2496 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2497 } 2498 } 2499 } 2500 2501 for (j=0; j<ncols; j++) { 2502 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2503 ba++; bj++; 2504 } 2505 } 2506 2507 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2508 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2509 for (r = 0; r < m; ++r) { 2510 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2511 a[r] = diagA[r]; 2512 if (idx) idx[r] = cstart + diagIdx[r]; 2513 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2514 a[r] = diagA[r]; 2515 if (idx) { 2516 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2517 idx[r] = cstart + diagIdx[r]; 2518 } else idx[r] = offdiagIdx[r]; 2519 } 2520 } else { 2521 a[r] = offdiagA[r]; 2522 if (idx) idx[r] = offdiagIdx[r]; 2523 } 2524 } 2525 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2526 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2527 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2528 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2529 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2530 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2531 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2532 PetscFunctionReturn(0); 2533 } 2534 2535 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2536 { 2537 PetscErrorCode ierr; 2538 Mat *dummy; 2539 2540 PetscFunctionBegin; 2541 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2542 *newmat = *dummy; 2543 ierr = PetscFree(dummy);CHKERRQ(ierr); 2544 PetscFunctionReturn(0); 2545 } 2546 2547 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2548 { 2549 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2550 PetscErrorCode ierr; 2551 2552 PetscFunctionBegin; 2553 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2554 A->factorerrortype = a->A->factorerrortype; 2555 PetscFunctionReturn(0); 2556 } 2557 2558 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2559 { 2560 PetscErrorCode ierr; 2561 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2562 2563 PetscFunctionBegin; 2564 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2565 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2566 if (x->assembled) { 2567 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2568 } else { 2569 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2570 } 2571 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2572 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2573 PetscFunctionReturn(0); 2574 } 2575 2576 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2577 { 2578 PetscFunctionBegin; 2579 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2580 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2581 PetscFunctionReturn(0); 2582 } 2583 2584 /*@ 2585 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2586 2587 Collective on Mat 2588 2589 Input Parameters: 2590 + A - the matrix 2591 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2592 2593 Level: advanced 2594 2595 @*/ 2596 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2597 { 2598 PetscErrorCode ierr; 2599 2600 PetscFunctionBegin; 2601 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2602 PetscFunctionReturn(0); 2603 } 2604 2605 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2606 { 2607 PetscErrorCode ierr; 2608 PetscBool sc = PETSC_FALSE,flg; 2609 2610 PetscFunctionBegin; 2611 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2612 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2613 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2614 if (flg) { 2615 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2616 } 2617 ierr = PetscOptionsTail();CHKERRQ(ierr); 2618 PetscFunctionReturn(0); 2619 } 2620 2621 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2622 { 2623 PetscErrorCode ierr; 2624 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2625 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2626 2627 PetscFunctionBegin; 2628 if (!Y->preallocated) { 2629 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2630 } else if (!aij->nz) { 2631 PetscInt nonew = aij->nonew; 2632 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2633 aij->nonew = nonew; 2634 } 2635 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2636 PetscFunctionReturn(0); 2637 } 2638 2639 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2640 { 2641 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2642 PetscErrorCode ierr; 2643 2644 PetscFunctionBegin; 2645 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2646 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2647 if (d) { 2648 PetscInt rstart; 2649 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2650 *d += rstart; 2651 2652 } 2653 PetscFunctionReturn(0); 2654 } 2655 2656 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2657 { 2658 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2659 PetscErrorCode ierr; 2660 2661 PetscFunctionBegin; 2662 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2663 PetscFunctionReturn(0); 2664 } 2665 2666 /* -------------------------------------------------------------------*/ 2667 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2668 MatGetRow_MPIAIJ, 2669 MatRestoreRow_MPIAIJ, 2670 MatMult_MPIAIJ, 2671 /* 4*/ MatMultAdd_MPIAIJ, 2672 MatMultTranspose_MPIAIJ, 2673 MatMultTransposeAdd_MPIAIJ, 2674 NULL, 2675 NULL, 2676 NULL, 2677 /*10*/ NULL, 2678 NULL, 2679 NULL, 2680 MatSOR_MPIAIJ, 2681 MatTranspose_MPIAIJ, 2682 /*15*/ MatGetInfo_MPIAIJ, 2683 MatEqual_MPIAIJ, 2684 MatGetDiagonal_MPIAIJ, 2685 MatDiagonalScale_MPIAIJ, 2686 MatNorm_MPIAIJ, 2687 /*20*/ MatAssemblyBegin_MPIAIJ, 2688 MatAssemblyEnd_MPIAIJ, 2689 MatSetOption_MPIAIJ, 2690 MatZeroEntries_MPIAIJ, 2691 /*24*/ MatZeroRows_MPIAIJ, 2692 NULL, 2693 NULL, 2694 NULL, 2695 NULL, 2696 /*29*/ MatSetUp_MPIAIJ, 2697 NULL, 2698 NULL, 2699 MatGetDiagonalBlock_MPIAIJ, 2700 NULL, 2701 /*34*/ MatDuplicate_MPIAIJ, 2702 NULL, 2703 NULL, 2704 NULL, 2705 NULL, 2706 /*39*/ MatAXPY_MPIAIJ, 2707 MatCreateSubMatrices_MPIAIJ, 2708 MatIncreaseOverlap_MPIAIJ, 2709 MatGetValues_MPIAIJ, 2710 MatCopy_MPIAIJ, 2711 /*44*/ MatGetRowMax_MPIAIJ, 2712 MatScale_MPIAIJ, 2713 MatShift_MPIAIJ, 2714 MatDiagonalSet_MPIAIJ, 2715 MatZeroRowsColumns_MPIAIJ, 2716 /*49*/ MatSetRandom_MPIAIJ, 2717 NULL, 2718 NULL, 2719 NULL, 2720 NULL, 2721 /*54*/ MatFDColoringCreate_MPIXAIJ, 2722 NULL, 2723 MatSetUnfactored_MPIAIJ, 2724 MatPermute_MPIAIJ, 2725 NULL, 2726 /*59*/ MatCreateSubMatrix_MPIAIJ, 2727 MatDestroy_MPIAIJ, 2728 MatView_MPIAIJ, 2729 NULL, 2730 NULL, 2731 /*64*/ NULL, 2732 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2733 NULL, 2734 NULL, 2735 NULL, 2736 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2737 MatGetRowMinAbs_MPIAIJ, 2738 NULL, 2739 NULL, 2740 NULL, 2741 NULL, 2742 /*75*/ MatFDColoringApply_AIJ, 2743 MatSetFromOptions_MPIAIJ, 2744 NULL, 2745 NULL, 2746 MatFindZeroDiagonals_MPIAIJ, 2747 /*80*/ NULL, 2748 NULL, 2749 NULL, 2750 /*83*/ MatLoad_MPIAIJ, 2751 MatIsSymmetric_MPIAIJ, 2752 NULL, 2753 NULL, 2754 NULL, 2755 NULL, 2756 /*89*/ NULL, 2757 NULL, 2758 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2759 NULL, 2760 NULL, 2761 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2762 NULL, 2763 NULL, 2764 NULL, 2765 MatBindToCPU_MPIAIJ, 2766 /*99*/ MatProductSetFromOptions_MPIAIJ, 2767 NULL, 2768 NULL, 2769 MatConjugate_MPIAIJ, 2770 NULL, 2771 /*104*/MatSetValuesRow_MPIAIJ, 2772 MatRealPart_MPIAIJ, 2773 MatImaginaryPart_MPIAIJ, 2774 NULL, 2775 NULL, 2776 /*109*/NULL, 2777 NULL, 2778 MatGetRowMin_MPIAIJ, 2779 NULL, 2780 MatMissingDiagonal_MPIAIJ, 2781 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2782 NULL, 2783 MatGetGhosts_MPIAIJ, 2784 NULL, 2785 NULL, 2786 /*119*/MatMultDiagonalBlock_MPIAIJ, 2787 NULL, 2788 NULL, 2789 NULL, 2790 MatGetMultiProcBlock_MPIAIJ, 2791 /*124*/MatFindNonzeroRows_MPIAIJ, 2792 MatGetColumnNorms_MPIAIJ, 2793 MatInvertBlockDiagonal_MPIAIJ, 2794 MatInvertVariableBlockDiagonal_MPIAIJ, 2795 MatCreateSubMatricesMPI_MPIAIJ, 2796 /*129*/NULL, 2797 NULL, 2798 NULL, 2799 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2800 NULL, 2801 /*134*/NULL, 2802 NULL, 2803 NULL, 2804 NULL, 2805 NULL, 2806 /*139*/MatSetBlockSizes_MPIAIJ, 2807 NULL, 2808 NULL, 2809 MatFDColoringSetUp_MPIXAIJ, 2810 MatFindOffBlockDiagonalEntries_MPIAIJ, 2811 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2812 /*145*/NULL, 2813 NULL, 2814 NULL 2815 }; 2816 2817 /* ----------------------------------------------------------------------------------------*/ 2818 2819 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2820 { 2821 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2822 PetscErrorCode ierr; 2823 2824 PetscFunctionBegin; 2825 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2826 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2827 PetscFunctionReturn(0); 2828 } 2829 2830 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2831 { 2832 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2833 PetscErrorCode ierr; 2834 2835 PetscFunctionBegin; 2836 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2837 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2838 PetscFunctionReturn(0); 2839 } 2840 2841 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2842 { 2843 Mat_MPIAIJ *b; 2844 PetscErrorCode ierr; 2845 PetscMPIInt size; 2846 2847 PetscFunctionBegin; 2848 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2849 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2850 b = (Mat_MPIAIJ*)B->data; 2851 2852 #if defined(PETSC_USE_CTABLE) 2853 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2854 #else 2855 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2856 #endif 2857 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2858 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2859 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2860 2861 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2862 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 2863 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2864 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2865 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2866 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2867 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2868 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2869 2870 if (!B->preallocated) { 2871 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2872 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2873 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2874 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2875 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2876 } 2877 2878 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2879 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2880 B->preallocated = PETSC_TRUE; 2881 B->was_assembled = PETSC_FALSE; 2882 B->assembled = PETSC_FALSE; 2883 PetscFunctionReturn(0); 2884 } 2885 2886 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2887 { 2888 Mat_MPIAIJ *b; 2889 PetscErrorCode ierr; 2890 2891 PetscFunctionBegin; 2892 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2893 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2894 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2895 b = (Mat_MPIAIJ*)B->data; 2896 2897 #if defined(PETSC_USE_CTABLE) 2898 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2899 #else 2900 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2901 #endif 2902 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2903 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2904 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2905 2906 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2907 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2908 B->preallocated = PETSC_TRUE; 2909 B->was_assembled = PETSC_FALSE; 2910 B->assembled = PETSC_FALSE; 2911 PetscFunctionReturn(0); 2912 } 2913 2914 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2915 { 2916 Mat mat; 2917 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2918 PetscErrorCode ierr; 2919 2920 PetscFunctionBegin; 2921 *newmat = NULL; 2922 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2923 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2924 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2925 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2926 a = (Mat_MPIAIJ*)mat->data; 2927 2928 mat->factortype = matin->factortype; 2929 mat->assembled = matin->assembled; 2930 mat->insertmode = NOT_SET_VALUES; 2931 mat->preallocated = matin->preallocated; 2932 2933 a->size = oldmat->size; 2934 a->rank = oldmat->rank; 2935 a->donotstash = oldmat->donotstash; 2936 a->roworiented = oldmat->roworiented; 2937 a->rowindices = NULL; 2938 a->rowvalues = NULL; 2939 a->getrowactive = PETSC_FALSE; 2940 2941 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2942 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2943 2944 if (oldmat->colmap) { 2945 #if defined(PETSC_USE_CTABLE) 2946 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2947 #else 2948 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2949 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2950 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2951 #endif 2952 } else a->colmap = NULL; 2953 if (oldmat->garray) { 2954 PetscInt len; 2955 len = oldmat->B->cmap->n; 2956 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2957 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2958 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2959 } else a->garray = NULL; 2960 2961 /* It may happen MatDuplicate is called with a non-assembled matrix 2962 In fact, MatDuplicate only requires the matrix to be preallocated 2963 This may happen inside a DMCreateMatrix_Shell */ 2964 if (oldmat->lvec) { 2965 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2966 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2967 } 2968 if (oldmat->Mvctx) { 2969 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2970 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2971 } 2972 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2973 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2974 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2975 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2976 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2977 *newmat = mat; 2978 PetscFunctionReturn(0); 2979 } 2980 2981 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2982 { 2983 PetscBool isbinary, ishdf5; 2984 PetscErrorCode ierr; 2985 2986 PetscFunctionBegin; 2987 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2988 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2989 /* force binary viewer to load .info file if it has not yet done so */ 2990 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2991 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2992 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2993 if (isbinary) { 2994 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2995 } else if (ishdf5) { 2996 #if defined(PETSC_HAVE_HDF5) 2997 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2998 #else 2999 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3000 #endif 3001 } else { 3002 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3003 } 3004 PetscFunctionReturn(0); 3005 } 3006 3007 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3008 { 3009 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3010 PetscInt *rowidxs,*colidxs; 3011 PetscScalar *matvals; 3012 PetscErrorCode ierr; 3013 3014 PetscFunctionBegin; 3015 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3016 3017 /* read in matrix header */ 3018 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3019 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3020 M = header[1]; N = header[2]; nz = header[3]; 3021 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3022 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3023 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3024 3025 /* set block sizes from the viewer's .info file */ 3026 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3027 /* set global sizes if not set already */ 3028 if (mat->rmap->N < 0) mat->rmap->N = M; 3029 if (mat->cmap->N < 0) mat->cmap->N = N; 3030 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3031 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3032 3033 /* check if the matrix sizes are correct */ 3034 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3035 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3036 3037 /* read in row lengths and build row indices */ 3038 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3039 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3040 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3041 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3042 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 3043 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3044 /* read in column indices and matrix values */ 3045 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3046 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3047 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3048 /* store matrix indices and values */ 3049 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3050 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3051 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3052 PetscFunctionReturn(0); 3053 } 3054 3055 /* Not scalable because of ISAllGather() unless getting all columns. */ 3056 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3057 { 3058 PetscErrorCode ierr; 3059 IS iscol_local; 3060 PetscBool isstride; 3061 PetscMPIInt lisstride=0,gisstride; 3062 3063 PetscFunctionBegin; 3064 /* check if we are grabbing all columns*/ 3065 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3066 3067 if (isstride) { 3068 PetscInt start,len,mstart,mlen; 3069 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3070 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3071 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3072 if (mstart == start && mlen-mstart == len) lisstride = 1; 3073 } 3074 3075 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3076 if (gisstride) { 3077 PetscInt N; 3078 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3079 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3080 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3081 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3082 } else { 3083 PetscInt cbs; 3084 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3085 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3086 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3087 } 3088 3089 *isseq = iscol_local; 3090 PetscFunctionReturn(0); 3091 } 3092 3093 /* 3094 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3095 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3096 3097 Input Parameters: 3098 mat - matrix 3099 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3100 i.e., mat->rstart <= isrow[i] < mat->rend 3101 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3102 i.e., mat->cstart <= iscol[i] < mat->cend 3103 Output Parameter: 3104 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3105 iscol_o - sequential column index set for retrieving mat->B 3106 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3107 */ 3108 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3109 { 3110 PetscErrorCode ierr; 3111 Vec x,cmap; 3112 const PetscInt *is_idx; 3113 PetscScalar *xarray,*cmaparray; 3114 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3115 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3116 Mat B=a->B; 3117 Vec lvec=a->lvec,lcmap; 3118 PetscInt i,cstart,cend,Bn=B->cmap->N; 3119 MPI_Comm comm; 3120 VecScatter Mvctx=a->Mvctx; 3121 3122 PetscFunctionBegin; 3123 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3124 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3125 3126 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3127 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3128 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3129 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3130 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3131 3132 /* Get start indices */ 3133 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3134 isstart -= ncols; 3135 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3136 3137 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3138 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3139 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3140 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3141 for (i=0; i<ncols; i++) { 3142 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3143 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3144 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3145 } 3146 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3147 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3148 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3149 3150 /* Get iscol_d */ 3151 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3152 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3153 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3154 3155 /* Get isrow_d */ 3156 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3157 rstart = mat->rmap->rstart; 3158 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3159 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3160 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3161 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3162 3163 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3164 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3165 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3166 3167 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3168 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3169 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3170 3171 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3172 3173 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3174 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3175 3176 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3177 /* off-process column indices */ 3178 count = 0; 3179 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3180 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3181 3182 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3183 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3184 for (i=0; i<Bn; i++) { 3185 if (PetscRealPart(xarray[i]) > -1.0) { 3186 idx[count] = i; /* local column index in off-diagonal part B */ 3187 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3188 count++; 3189 } 3190 } 3191 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3192 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3193 3194 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3195 /* cannot ensure iscol_o has same blocksize as iscol! */ 3196 3197 ierr = PetscFree(idx);CHKERRQ(ierr); 3198 *garray = cmap1; 3199 3200 ierr = VecDestroy(&x);CHKERRQ(ierr); 3201 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3202 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3203 PetscFunctionReturn(0); 3204 } 3205 3206 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3207 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3208 { 3209 PetscErrorCode ierr; 3210 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3211 Mat M = NULL; 3212 MPI_Comm comm; 3213 IS iscol_d,isrow_d,iscol_o; 3214 Mat Asub = NULL,Bsub = NULL; 3215 PetscInt n; 3216 3217 PetscFunctionBegin; 3218 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3219 3220 if (call == MAT_REUSE_MATRIX) { 3221 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3222 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3223 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3224 3225 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3226 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3227 3228 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3229 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3230 3231 /* Update diagonal and off-diagonal portions of submat */ 3232 asub = (Mat_MPIAIJ*)(*submat)->data; 3233 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3234 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3235 if (n) { 3236 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3237 } 3238 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3239 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3240 3241 } else { /* call == MAT_INITIAL_MATRIX) */ 3242 const PetscInt *garray; 3243 PetscInt BsubN; 3244 3245 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3246 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3247 3248 /* Create local submatrices Asub and Bsub */ 3249 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3250 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3251 3252 /* Create submatrix M */ 3253 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3254 3255 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3256 asub = (Mat_MPIAIJ*)M->data; 3257 3258 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3259 n = asub->B->cmap->N; 3260 if (BsubN > n) { 3261 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3262 const PetscInt *idx; 3263 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3264 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3265 3266 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3267 j = 0; 3268 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3269 for (i=0; i<n; i++) { 3270 if (j >= BsubN) break; 3271 while (subgarray[i] > garray[j]) j++; 3272 3273 if (subgarray[i] == garray[j]) { 3274 idx_new[i] = idx[j++]; 3275 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3276 } 3277 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3278 3279 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3280 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3281 3282 } else if (BsubN < n) { 3283 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3284 } 3285 3286 ierr = PetscFree(garray);CHKERRQ(ierr); 3287 *submat = M; 3288 3289 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3290 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3291 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3292 3293 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3294 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3295 3296 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3297 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3298 } 3299 PetscFunctionReturn(0); 3300 } 3301 3302 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3303 { 3304 PetscErrorCode ierr; 3305 IS iscol_local=NULL,isrow_d; 3306 PetscInt csize; 3307 PetscInt n,i,j,start,end; 3308 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3309 MPI_Comm comm; 3310 3311 PetscFunctionBegin; 3312 /* If isrow has same processor distribution as mat, 3313 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3314 if (call == MAT_REUSE_MATRIX) { 3315 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3316 if (isrow_d) { 3317 sameRowDist = PETSC_TRUE; 3318 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3319 } else { 3320 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3321 if (iscol_local) { 3322 sameRowDist = PETSC_TRUE; 3323 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3324 } 3325 } 3326 } else { 3327 /* Check if isrow has same processor distribution as mat */ 3328 sameDist[0] = PETSC_FALSE; 3329 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3330 if (!n) { 3331 sameDist[0] = PETSC_TRUE; 3332 } else { 3333 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3334 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3335 if (i >= start && j < end) { 3336 sameDist[0] = PETSC_TRUE; 3337 } 3338 } 3339 3340 /* Check if iscol has same processor distribution as mat */ 3341 sameDist[1] = PETSC_FALSE; 3342 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3343 if (!n) { 3344 sameDist[1] = PETSC_TRUE; 3345 } else { 3346 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3347 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3348 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3349 } 3350 3351 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3352 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3353 sameRowDist = tsameDist[0]; 3354 } 3355 3356 if (sameRowDist) { 3357 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3358 /* isrow and iscol have same processor distribution as mat */ 3359 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3360 PetscFunctionReturn(0); 3361 } else { /* sameRowDist */ 3362 /* isrow has same processor distribution as mat */ 3363 if (call == MAT_INITIAL_MATRIX) { 3364 PetscBool sorted; 3365 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3366 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3367 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3368 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3369 3370 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3371 if (sorted) { 3372 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3373 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3374 PetscFunctionReturn(0); 3375 } 3376 } else { /* call == MAT_REUSE_MATRIX */ 3377 IS iscol_sub; 3378 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3379 if (iscol_sub) { 3380 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3381 PetscFunctionReturn(0); 3382 } 3383 } 3384 } 3385 } 3386 3387 /* General case: iscol -> iscol_local which has global size of iscol */ 3388 if (call == MAT_REUSE_MATRIX) { 3389 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3390 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3391 } else { 3392 if (!iscol_local) { 3393 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3394 } 3395 } 3396 3397 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3398 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3399 3400 if (call == MAT_INITIAL_MATRIX) { 3401 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3402 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3403 } 3404 PetscFunctionReturn(0); 3405 } 3406 3407 /*@C 3408 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3409 and "off-diagonal" part of the matrix in CSR format. 3410 3411 Collective 3412 3413 Input Parameters: 3414 + comm - MPI communicator 3415 . A - "diagonal" portion of matrix 3416 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3417 - garray - global index of B columns 3418 3419 Output Parameter: 3420 . mat - the matrix, with input A as its local diagonal matrix 3421 Level: advanced 3422 3423 Notes: 3424 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3425 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3426 3427 .seealso: MatCreateMPIAIJWithSplitArrays() 3428 @*/ 3429 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3430 { 3431 PetscErrorCode ierr; 3432 Mat_MPIAIJ *maij; 3433 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3434 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3435 const PetscScalar *oa; 3436 Mat Bnew; 3437 PetscInt m,n,N; 3438 3439 PetscFunctionBegin; 3440 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3441 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3442 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3443 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3444 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3445 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3446 3447 /* Get global columns of mat */ 3448 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3449 3450 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3451 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3452 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3453 maij = (Mat_MPIAIJ*)(*mat)->data; 3454 3455 (*mat)->preallocated = PETSC_TRUE; 3456 3457 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3458 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3459 3460 /* Set A as diagonal portion of *mat */ 3461 maij->A = A; 3462 3463 nz = oi[m]; 3464 for (i=0; i<nz; i++) { 3465 col = oj[i]; 3466 oj[i] = garray[col]; 3467 } 3468 3469 /* Set Bnew as off-diagonal portion of *mat */ 3470 ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr); 3471 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr); 3472 ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr); 3473 bnew = (Mat_SeqAIJ*)Bnew->data; 3474 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3475 maij->B = Bnew; 3476 3477 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3478 3479 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3480 b->free_a = PETSC_FALSE; 3481 b->free_ij = PETSC_FALSE; 3482 ierr = MatDestroy(&B);CHKERRQ(ierr); 3483 3484 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3485 bnew->free_a = PETSC_TRUE; 3486 bnew->free_ij = PETSC_TRUE; 3487 3488 /* condense columns of maij->B */ 3489 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3490 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3491 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3492 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3493 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3494 PetscFunctionReturn(0); 3495 } 3496 3497 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3498 3499 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3500 { 3501 PetscErrorCode ierr; 3502 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3503 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3504 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3505 Mat M,Msub,B=a->B; 3506 MatScalar *aa; 3507 Mat_SeqAIJ *aij; 3508 PetscInt *garray = a->garray,*colsub,Ncols; 3509 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3510 IS iscol_sub,iscmap; 3511 const PetscInt *is_idx,*cmap; 3512 PetscBool allcolumns=PETSC_FALSE; 3513 MPI_Comm comm; 3514 3515 PetscFunctionBegin; 3516 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3517 if (call == MAT_REUSE_MATRIX) { 3518 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3519 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3520 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3521 3522 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3523 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3524 3525 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3526 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3527 3528 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3529 3530 } else { /* call == MAT_INITIAL_MATRIX) */ 3531 PetscBool flg; 3532 3533 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3534 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3535 3536 /* (1) iscol -> nonscalable iscol_local */ 3537 /* Check for special case: each processor gets entire matrix columns */ 3538 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3539 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3540 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3541 if (allcolumns) { 3542 iscol_sub = iscol_local; 3543 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3544 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3545 3546 } else { 3547 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3548 PetscInt *idx,*cmap1,k; 3549 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3550 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3551 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3552 count = 0; 3553 k = 0; 3554 for (i=0; i<Ncols; i++) { 3555 j = is_idx[i]; 3556 if (j >= cstart && j < cend) { 3557 /* diagonal part of mat */ 3558 idx[count] = j; 3559 cmap1[count++] = i; /* column index in submat */ 3560 } else if (Bn) { 3561 /* off-diagonal part of mat */ 3562 if (j == garray[k]) { 3563 idx[count] = j; 3564 cmap1[count++] = i; /* column index in submat */ 3565 } else if (j > garray[k]) { 3566 while (j > garray[k] && k < Bn-1) k++; 3567 if (j == garray[k]) { 3568 idx[count] = j; 3569 cmap1[count++] = i; /* column index in submat */ 3570 } 3571 } 3572 } 3573 } 3574 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3575 3576 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3577 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3578 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3579 3580 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3581 } 3582 3583 /* (3) Create sequential Msub */ 3584 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3585 } 3586 3587 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3588 aij = (Mat_SeqAIJ*)(Msub)->data; 3589 ii = aij->i; 3590 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3591 3592 /* 3593 m - number of local rows 3594 Ncols - number of columns (same on all processors) 3595 rstart - first row in new global matrix generated 3596 */ 3597 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3598 3599 if (call == MAT_INITIAL_MATRIX) { 3600 /* (4) Create parallel newmat */ 3601 PetscMPIInt rank,size; 3602 PetscInt csize; 3603 3604 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3605 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3606 3607 /* 3608 Determine the number of non-zeros in the diagonal and off-diagonal 3609 portions of the matrix in order to do correct preallocation 3610 */ 3611 3612 /* first get start and end of "diagonal" columns */ 3613 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3614 if (csize == PETSC_DECIDE) { 3615 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3616 if (mglobal == Ncols) { /* square matrix */ 3617 nlocal = m; 3618 } else { 3619 nlocal = Ncols/size + ((Ncols % size) > rank); 3620 } 3621 } else { 3622 nlocal = csize; 3623 } 3624 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3625 rstart = rend - nlocal; 3626 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3627 3628 /* next, compute all the lengths */ 3629 jj = aij->j; 3630 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3631 olens = dlens + m; 3632 for (i=0; i<m; i++) { 3633 jend = ii[i+1] - ii[i]; 3634 olen = 0; 3635 dlen = 0; 3636 for (j=0; j<jend; j++) { 3637 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3638 else dlen++; 3639 jj++; 3640 } 3641 olens[i] = olen; 3642 dlens[i] = dlen; 3643 } 3644 3645 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3646 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3647 3648 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3649 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3650 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3651 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3652 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3653 ierr = PetscFree(dlens);CHKERRQ(ierr); 3654 3655 } else { /* call == MAT_REUSE_MATRIX */ 3656 M = *newmat; 3657 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3658 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3659 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3660 /* 3661 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3662 rather than the slower MatSetValues(). 3663 */ 3664 M->was_assembled = PETSC_TRUE; 3665 M->assembled = PETSC_FALSE; 3666 } 3667 3668 /* (5) Set values of Msub to *newmat */ 3669 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3670 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3671 3672 jj = aij->j; 3673 ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3674 for (i=0; i<m; i++) { 3675 row = rstart + i; 3676 nz = ii[i+1] - ii[i]; 3677 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3678 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3679 jj += nz; aa += nz; 3680 } 3681 ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3682 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3683 3684 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3685 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3686 3687 ierr = PetscFree(colsub);CHKERRQ(ierr); 3688 3689 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3690 if (call == MAT_INITIAL_MATRIX) { 3691 *newmat = M; 3692 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3693 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3694 3695 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3696 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3697 3698 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3699 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3700 3701 if (iscol_local) { 3702 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3703 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3704 } 3705 } 3706 PetscFunctionReturn(0); 3707 } 3708 3709 /* 3710 Not great since it makes two copies of the submatrix, first an SeqAIJ 3711 in local and then by concatenating the local matrices the end result. 3712 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3713 3714 Note: This requires a sequential iscol with all indices. 3715 */ 3716 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3717 { 3718 PetscErrorCode ierr; 3719 PetscMPIInt rank,size; 3720 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3721 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3722 Mat M,Mreuse; 3723 MatScalar *aa,*vwork; 3724 MPI_Comm comm; 3725 Mat_SeqAIJ *aij; 3726 PetscBool colflag,allcolumns=PETSC_FALSE; 3727 3728 PetscFunctionBegin; 3729 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3730 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3731 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3732 3733 /* Check for special case: each processor gets entire matrix columns */ 3734 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3735 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3736 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3737 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3738 3739 if (call == MAT_REUSE_MATRIX) { 3740 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3741 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3742 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3743 } else { 3744 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3745 } 3746 3747 /* 3748 m - number of local rows 3749 n - number of columns (same on all processors) 3750 rstart - first row in new global matrix generated 3751 */ 3752 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3753 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3754 if (call == MAT_INITIAL_MATRIX) { 3755 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3756 ii = aij->i; 3757 jj = aij->j; 3758 3759 /* 3760 Determine the number of non-zeros in the diagonal and off-diagonal 3761 portions of the matrix in order to do correct preallocation 3762 */ 3763 3764 /* first get start and end of "diagonal" columns */ 3765 if (csize == PETSC_DECIDE) { 3766 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3767 if (mglobal == n) { /* square matrix */ 3768 nlocal = m; 3769 } else { 3770 nlocal = n/size + ((n % size) > rank); 3771 } 3772 } else { 3773 nlocal = csize; 3774 } 3775 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3776 rstart = rend - nlocal; 3777 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3778 3779 /* next, compute all the lengths */ 3780 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3781 olens = dlens + m; 3782 for (i=0; i<m; i++) { 3783 jend = ii[i+1] - ii[i]; 3784 olen = 0; 3785 dlen = 0; 3786 for (j=0; j<jend; j++) { 3787 if (*jj < rstart || *jj >= rend) olen++; 3788 else dlen++; 3789 jj++; 3790 } 3791 olens[i] = olen; 3792 dlens[i] = dlen; 3793 } 3794 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3795 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3796 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3797 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3798 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3799 ierr = PetscFree(dlens);CHKERRQ(ierr); 3800 } else { 3801 PetscInt ml,nl; 3802 3803 M = *newmat; 3804 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3805 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3806 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3807 /* 3808 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3809 rather than the slower MatSetValues(). 3810 */ 3811 M->was_assembled = PETSC_TRUE; 3812 M->assembled = PETSC_FALSE; 3813 } 3814 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3815 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3816 ii = aij->i; 3817 jj = aij->j; 3818 3819 /* trigger copy to CPU if needed */ 3820 ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3821 for (i=0; i<m; i++) { 3822 row = rstart + i; 3823 nz = ii[i+1] - ii[i]; 3824 cwork = jj; jj += nz; 3825 vwork = aa; aa += nz; 3826 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3827 } 3828 ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3829 3830 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3831 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3832 *newmat = M; 3833 3834 /* save submatrix used in processor for next request */ 3835 if (call == MAT_INITIAL_MATRIX) { 3836 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3837 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3838 } 3839 PetscFunctionReturn(0); 3840 } 3841 3842 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3843 { 3844 PetscInt m,cstart, cend,j,nnz,i,d; 3845 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3846 const PetscInt *JJ; 3847 PetscErrorCode ierr; 3848 PetscBool nooffprocentries; 3849 3850 PetscFunctionBegin; 3851 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3852 3853 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3854 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3855 m = B->rmap->n; 3856 cstart = B->cmap->rstart; 3857 cend = B->cmap->rend; 3858 rstart = B->rmap->rstart; 3859 3860 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3861 3862 if (PetscDefined(USE_DEBUG)) { 3863 for (i=0; i<m; i++) { 3864 nnz = Ii[i+1]- Ii[i]; 3865 JJ = J + Ii[i]; 3866 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3867 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3868 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3869 } 3870 } 3871 3872 for (i=0; i<m; i++) { 3873 nnz = Ii[i+1]- Ii[i]; 3874 JJ = J + Ii[i]; 3875 nnz_max = PetscMax(nnz_max,nnz); 3876 d = 0; 3877 for (j=0; j<nnz; j++) { 3878 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3879 } 3880 d_nnz[i] = d; 3881 o_nnz[i] = nnz - d; 3882 } 3883 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3884 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3885 3886 for (i=0; i<m; i++) { 3887 ii = i + rstart; 3888 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3889 } 3890 nooffprocentries = B->nooffprocentries; 3891 B->nooffprocentries = PETSC_TRUE; 3892 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3893 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3894 B->nooffprocentries = nooffprocentries; 3895 3896 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3897 PetscFunctionReturn(0); 3898 } 3899 3900 /*@ 3901 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3902 (the default parallel PETSc format). 3903 3904 Collective 3905 3906 Input Parameters: 3907 + B - the matrix 3908 . i - the indices into j for the start of each local row (starts with zero) 3909 . j - the column indices for each local row (starts with zero) 3910 - v - optional values in the matrix 3911 3912 Level: developer 3913 3914 Notes: 3915 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3916 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3917 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3918 3919 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3920 3921 The format which is used for the sparse matrix input, is equivalent to a 3922 row-major ordering.. i.e for the following matrix, the input data expected is 3923 as shown 3924 3925 $ 1 0 0 3926 $ 2 0 3 P0 3927 $ ------- 3928 $ 4 5 6 P1 3929 $ 3930 $ Process0 [P0]: rows_owned=[0,1] 3931 $ i = {0,1,3} [size = nrow+1 = 2+1] 3932 $ j = {0,0,2} [size = 3] 3933 $ v = {1,2,3} [size = 3] 3934 $ 3935 $ Process1 [P1]: rows_owned=[2] 3936 $ i = {0,3} [size = nrow+1 = 1+1] 3937 $ j = {0,1,2} [size = 3] 3938 $ v = {4,5,6} [size = 3] 3939 3940 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3941 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3942 @*/ 3943 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3944 { 3945 PetscErrorCode ierr; 3946 3947 PetscFunctionBegin; 3948 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3949 PetscFunctionReturn(0); 3950 } 3951 3952 /*@C 3953 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3954 (the default parallel PETSc format). For good matrix assembly performance 3955 the user should preallocate the matrix storage by setting the parameters 3956 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3957 performance can be increased by more than a factor of 50. 3958 3959 Collective 3960 3961 Input Parameters: 3962 + B - the matrix 3963 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3964 (same value is used for all local rows) 3965 . d_nnz - array containing the number of nonzeros in the various rows of the 3966 DIAGONAL portion of the local submatrix (possibly different for each row) 3967 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3968 The size of this array is equal to the number of local rows, i.e 'm'. 3969 For matrices that will be factored, you must leave room for (and set) 3970 the diagonal entry even if it is zero. 3971 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3972 submatrix (same value is used for all local rows). 3973 - o_nnz - array containing the number of nonzeros in the various rows of the 3974 OFF-DIAGONAL portion of the local submatrix (possibly different for 3975 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3976 structure. The size of this array is equal to the number 3977 of local rows, i.e 'm'. 3978 3979 If the *_nnz parameter is given then the *_nz parameter is ignored 3980 3981 The AIJ format (also called the Yale sparse matrix format or 3982 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3983 storage. The stored row and column indices begin with zero. 3984 See Users-Manual: ch_mat for details. 3985 3986 The parallel matrix is partitioned such that the first m0 rows belong to 3987 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3988 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3989 3990 The DIAGONAL portion of the local submatrix of a processor can be defined 3991 as the submatrix which is obtained by extraction the part corresponding to 3992 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3993 first row that belongs to the processor, r2 is the last row belonging to 3994 the this processor, and c1-c2 is range of indices of the local part of a 3995 vector suitable for applying the matrix to. This is an mxn matrix. In the 3996 common case of a square matrix, the row and column ranges are the same and 3997 the DIAGONAL part is also square. The remaining portion of the local 3998 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3999 4000 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4001 4002 You can call MatGetInfo() to get information on how effective the preallocation was; 4003 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4004 You can also run with the option -info and look for messages with the string 4005 malloc in them to see if additional memory allocation was needed. 4006 4007 Example usage: 4008 4009 Consider the following 8x8 matrix with 34 non-zero values, that is 4010 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4011 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4012 as follows: 4013 4014 .vb 4015 1 2 0 | 0 3 0 | 0 4 4016 Proc0 0 5 6 | 7 0 0 | 8 0 4017 9 0 10 | 11 0 0 | 12 0 4018 ------------------------------------- 4019 13 0 14 | 15 16 17 | 0 0 4020 Proc1 0 18 0 | 19 20 21 | 0 0 4021 0 0 0 | 22 23 0 | 24 0 4022 ------------------------------------- 4023 Proc2 25 26 27 | 0 0 28 | 29 0 4024 30 0 0 | 31 32 33 | 0 34 4025 .ve 4026 4027 This can be represented as a collection of submatrices as: 4028 4029 .vb 4030 A B C 4031 D E F 4032 G H I 4033 .ve 4034 4035 Where the submatrices A,B,C are owned by proc0, D,E,F are 4036 owned by proc1, G,H,I are owned by proc2. 4037 4038 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4039 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4040 The 'M','N' parameters are 8,8, and have the same values on all procs. 4041 4042 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4043 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4044 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4045 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4046 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4047 matrix, ans [DF] as another SeqAIJ matrix. 4048 4049 When d_nz, o_nz parameters are specified, d_nz storage elements are 4050 allocated for every row of the local diagonal submatrix, and o_nz 4051 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4052 One way to choose d_nz and o_nz is to use the max nonzerors per local 4053 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4054 In this case, the values of d_nz,o_nz are: 4055 .vb 4056 proc0 : dnz = 2, o_nz = 2 4057 proc1 : dnz = 3, o_nz = 2 4058 proc2 : dnz = 1, o_nz = 4 4059 .ve 4060 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4061 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4062 for proc3. i.e we are using 12+15+10=37 storage locations to store 4063 34 values. 4064 4065 When d_nnz, o_nnz parameters are specified, the storage is specified 4066 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4067 In the above case the values for d_nnz,o_nnz are: 4068 .vb 4069 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4070 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4071 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4072 .ve 4073 Here the space allocated is sum of all the above values i.e 34, and 4074 hence pre-allocation is perfect. 4075 4076 Level: intermediate 4077 4078 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4079 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4080 @*/ 4081 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4082 { 4083 PetscErrorCode ierr; 4084 4085 PetscFunctionBegin; 4086 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4087 PetscValidType(B,1); 4088 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4089 PetscFunctionReturn(0); 4090 } 4091 4092 /*@ 4093 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4094 CSR format for the local rows. 4095 4096 Collective 4097 4098 Input Parameters: 4099 + comm - MPI communicator 4100 . m - number of local rows (Cannot be PETSC_DECIDE) 4101 . n - This value should be the same as the local size used in creating the 4102 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4103 calculated if N is given) For square matrices n is almost always m. 4104 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4105 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4106 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4107 . j - column indices 4108 - a - matrix values 4109 4110 Output Parameter: 4111 . mat - the matrix 4112 4113 Level: intermediate 4114 4115 Notes: 4116 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4117 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4118 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4119 4120 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4121 4122 The format which is used for the sparse matrix input, is equivalent to a 4123 row-major ordering.. i.e for the following matrix, the input data expected is 4124 as shown 4125 4126 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4127 4128 $ 1 0 0 4129 $ 2 0 3 P0 4130 $ ------- 4131 $ 4 5 6 P1 4132 $ 4133 $ Process0 [P0]: rows_owned=[0,1] 4134 $ i = {0,1,3} [size = nrow+1 = 2+1] 4135 $ j = {0,0,2} [size = 3] 4136 $ v = {1,2,3} [size = 3] 4137 $ 4138 $ Process1 [P1]: rows_owned=[2] 4139 $ i = {0,3} [size = nrow+1 = 1+1] 4140 $ j = {0,1,2} [size = 3] 4141 $ v = {4,5,6} [size = 3] 4142 4143 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4144 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4145 @*/ 4146 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4147 { 4148 PetscErrorCode ierr; 4149 4150 PetscFunctionBegin; 4151 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4152 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4153 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4154 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4155 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4156 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4157 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4158 PetscFunctionReturn(0); 4159 } 4160 4161 /*@ 4162 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4163 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4164 4165 Collective 4166 4167 Input Parameters: 4168 + mat - the matrix 4169 . m - number of local rows (Cannot be PETSC_DECIDE) 4170 . n - This value should be the same as the local size used in creating the 4171 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4172 calculated if N is given) For square matrices n is almost always m. 4173 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4174 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4175 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4176 . J - column indices 4177 - v - matrix values 4178 4179 Level: intermediate 4180 4181 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4182 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4183 @*/ 4184 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4185 { 4186 PetscErrorCode ierr; 4187 PetscInt cstart,nnz,i,j; 4188 PetscInt *ld; 4189 PetscBool nooffprocentries; 4190 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4191 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4192 PetscScalar *ad = Ad->a, *ao = Ao->a; 4193 const PetscInt *Adi = Ad->i; 4194 PetscInt ldi,Iii,md; 4195 4196 PetscFunctionBegin; 4197 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4198 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4199 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4200 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4201 4202 cstart = mat->cmap->rstart; 4203 if (!Aij->ld) { 4204 /* count number of entries below block diagonal */ 4205 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4206 Aij->ld = ld; 4207 for (i=0; i<m; i++) { 4208 nnz = Ii[i+1]- Ii[i]; 4209 j = 0; 4210 while (J[j] < cstart && j < nnz) {j++;} 4211 J += nnz; 4212 ld[i] = j; 4213 } 4214 } else { 4215 ld = Aij->ld; 4216 } 4217 4218 for (i=0; i<m; i++) { 4219 nnz = Ii[i+1]- Ii[i]; 4220 Iii = Ii[i]; 4221 ldi = ld[i]; 4222 md = Adi[i+1]-Adi[i]; 4223 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4224 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4225 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4226 ad += md; 4227 ao += nnz - md; 4228 } 4229 nooffprocentries = mat->nooffprocentries; 4230 mat->nooffprocentries = PETSC_TRUE; 4231 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4232 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4233 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4234 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4235 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4236 mat->nooffprocentries = nooffprocentries; 4237 PetscFunctionReturn(0); 4238 } 4239 4240 /*@C 4241 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4242 (the default parallel PETSc format). For good matrix assembly performance 4243 the user should preallocate the matrix storage by setting the parameters 4244 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4245 performance can be increased by more than a factor of 50. 4246 4247 Collective 4248 4249 Input Parameters: 4250 + comm - MPI communicator 4251 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4252 This value should be the same as the local size used in creating the 4253 y vector for the matrix-vector product y = Ax. 4254 . n - This value should be the same as the local size used in creating the 4255 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4256 calculated if N is given) For square matrices n is almost always m. 4257 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4258 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4259 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4260 (same value is used for all local rows) 4261 . d_nnz - array containing the number of nonzeros in the various rows of the 4262 DIAGONAL portion of the local submatrix (possibly different for each row) 4263 or NULL, if d_nz is used to specify the nonzero structure. 4264 The size of this array is equal to the number of local rows, i.e 'm'. 4265 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4266 submatrix (same value is used for all local rows). 4267 - o_nnz - array containing the number of nonzeros in the various rows of the 4268 OFF-DIAGONAL portion of the local submatrix (possibly different for 4269 each row) or NULL, if o_nz is used to specify the nonzero 4270 structure. The size of this array is equal to the number 4271 of local rows, i.e 'm'. 4272 4273 Output Parameter: 4274 . A - the matrix 4275 4276 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4277 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4278 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4279 4280 Notes: 4281 If the *_nnz parameter is given then the *_nz parameter is ignored 4282 4283 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4284 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4285 storage requirements for this matrix. 4286 4287 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4288 processor than it must be used on all processors that share the object for 4289 that argument. 4290 4291 The user MUST specify either the local or global matrix dimensions 4292 (possibly both). 4293 4294 The parallel matrix is partitioned across processors such that the 4295 first m0 rows belong to process 0, the next m1 rows belong to 4296 process 1, the next m2 rows belong to process 2 etc.. where 4297 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4298 values corresponding to [m x N] submatrix. 4299 4300 The columns are logically partitioned with the n0 columns belonging 4301 to 0th partition, the next n1 columns belonging to the next 4302 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4303 4304 The DIAGONAL portion of the local submatrix on any given processor 4305 is the submatrix corresponding to the rows and columns m,n 4306 corresponding to the given processor. i.e diagonal matrix on 4307 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4308 etc. The remaining portion of the local submatrix [m x (N-n)] 4309 constitute the OFF-DIAGONAL portion. The example below better 4310 illustrates this concept. 4311 4312 For a square global matrix we define each processor's diagonal portion 4313 to be its local rows and the corresponding columns (a square submatrix); 4314 each processor's off-diagonal portion encompasses the remainder of the 4315 local matrix (a rectangular submatrix). 4316 4317 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4318 4319 When calling this routine with a single process communicator, a matrix of 4320 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4321 type of communicator, use the construction mechanism 4322 .vb 4323 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4324 .ve 4325 4326 $ MatCreate(...,&A); 4327 $ MatSetType(A,MATMPIAIJ); 4328 $ MatSetSizes(A, m,n,M,N); 4329 $ MatMPIAIJSetPreallocation(A,...); 4330 4331 By default, this format uses inodes (identical nodes) when possible. 4332 We search for consecutive rows with the same nonzero structure, thereby 4333 reusing matrix information to achieve increased efficiency. 4334 4335 Options Database Keys: 4336 + -mat_no_inode - Do not use inodes 4337 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4338 4339 4340 4341 Example usage: 4342 4343 Consider the following 8x8 matrix with 34 non-zero values, that is 4344 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4345 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4346 as follows 4347 4348 .vb 4349 1 2 0 | 0 3 0 | 0 4 4350 Proc0 0 5 6 | 7 0 0 | 8 0 4351 9 0 10 | 11 0 0 | 12 0 4352 ------------------------------------- 4353 13 0 14 | 15 16 17 | 0 0 4354 Proc1 0 18 0 | 19 20 21 | 0 0 4355 0 0 0 | 22 23 0 | 24 0 4356 ------------------------------------- 4357 Proc2 25 26 27 | 0 0 28 | 29 0 4358 30 0 0 | 31 32 33 | 0 34 4359 .ve 4360 4361 This can be represented as a collection of submatrices as 4362 4363 .vb 4364 A B C 4365 D E F 4366 G H I 4367 .ve 4368 4369 Where the submatrices A,B,C are owned by proc0, D,E,F are 4370 owned by proc1, G,H,I are owned by proc2. 4371 4372 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4373 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4374 The 'M','N' parameters are 8,8, and have the same values on all procs. 4375 4376 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4377 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4378 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4379 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4380 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4381 matrix, ans [DF] as another SeqAIJ matrix. 4382 4383 When d_nz, o_nz parameters are specified, d_nz storage elements are 4384 allocated for every row of the local diagonal submatrix, and o_nz 4385 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4386 One way to choose d_nz and o_nz is to use the max nonzerors per local 4387 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4388 In this case, the values of d_nz,o_nz are 4389 .vb 4390 proc0 : dnz = 2, o_nz = 2 4391 proc1 : dnz = 3, o_nz = 2 4392 proc2 : dnz = 1, o_nz = 4 4393 .ve 4394 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4395 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4396 for proc3. i.e we are using 12+15+10=37 storage locations to store 4397 34 values. 4398 4399 When d_nnz, o_nnz parameters are specified, the storage is specified 4400 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4401 In the above case the values for d_nnz,o_nnz are 4402 .vb 4403 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4404 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4405 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4406 .ve 4407 Here the space allocated is sum of all the above values i.e 34, and 4408 hence pre-allocation is perfect. 4409 4410 Level: intermediate 4411 4412 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4413 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4414 @*/ 4415 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4416 { 4417 PetscErrorCode ierr; 4418 PetscMPIInt size; 4419 4420 PetscFunctionBegin; 4421 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4422 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4423 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4424 if (size > 1) { 4425 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4426 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4427 } else { 4428 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4429 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4430 } 4431 PetscFunctionReturn(0); 4432 } 4433 4434 /*@C 4435 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4436 4437 Not collective 4438 4439 Input Parameter: 4440 . A - The MPIAIJ matrix 4441 4442 Output Parameters: 4443 + Ad - The local diagonal block as a SeqAIJ matrix 4444 . Ao - The local off-diagonal block as a SeqAIJ matrix 4445 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4446 4447 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4448 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4449 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4450 local column numbers to global column numbers in the original matrix. 4451 4452 Level: intermediate 4453 4454 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4455 @*/ 4456 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4457 { 4458 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4459 PetscBool flg; 4460 PetscErrorCode ierr; 4461 4462 PetscFunctionBegin; 4463 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4464 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4465 if (Ad) *Ad = a->A; 4466 if (Ao) *Ao = a->B; 4467 if (colmap) *colmap = a->garray; 4468 PetscFunctionReturn(0); 4469 } 4470 4471 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4472 { 4473 PetscErrorCode ierr; 4474 PetscInt m,N,i,rstart,nnz,Ii; 4475 PetscInt *indx; 4476 PetscScalar *values; 4477 4478 PetscFunctionBegin; 4479 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4480 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4481 PetscInt *dnz,*onz,sum,bs,cbs; 4482 4483 if (n == PETSC_DECIDE) { 4484 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4485 } 4486 /* Check sum(n) = N */ 4487 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4488 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4489 4490 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4491 rstart -= m; 4492 4493 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4494 for (i=0; i<m; i++) { 4495 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4496 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4497 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4498 } 4499 4500 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4501 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4502 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4503 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4504 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4505 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4506 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4507 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4508 ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4509 } 4510 4511 /* numeric phase */ 4512 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4513 for (i=0; i<m; i++) { 4514 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4515 Ii = i + rstart; 4516 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4517 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4518 } 4519 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4520 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4521 PetscFunctionReturn(0); 4522 } 4523 4524 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4525 { 4526 PetscErrorCode ierr; 4527 PetscMPIInt rank; 4528 PetscInt m,N,i,rstart,nnz; 4529 size_t len; 4530 const PetscInt *indx; 4531 PetscViewer out; 4532 char *name; 4533 Mat B; 4534 const PetscScalar *values; 4535 4536 PetscFunctionBegin; 4537 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4538 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4539 /* Should this be the type of the diagonal block of A? */ 4540 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4541 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4542 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4543 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4544 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4545 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4546 for (i=0; i<m; i++) { 4547 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4548 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4549 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4550 } 4551 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4552 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4553 4554 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 4555 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4556 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4557 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4558 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4559 ierr = PetscFree(name);CHKERRQ(ierr); 4560 ierr = MatView(B,out);CHKERRQ(ierr); 4561 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4562 ierr = MatDestroy(&B);CHKERRQ(ierr); 4563 PetscFunctionReturn(0); 4564 } 4565 4566 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4567 { 4568 PetscErrorCode ierr; 4569 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4570 4571 PetscFunctionBegin; 4572 if (!merge) PetscFunctionReturn(0); 4573 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4574 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4575 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4576 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4577 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4578 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4579 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4580 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4581 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4582 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4583 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4584 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4585 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4586 ierr = PetscFree(merge);CHKERRQ(ierr); 4587 PetscFunctionReturn(0); 4588 } 4589 4590 #include <../src/mat/utils/freespace.h> 4591 #include <petscbt.h> 4592 4593 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4594 { 4595 PetscErrorCode ierr; 4596 MPI_Comm comm; 4597 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4598 PetscMPIInt size,rank,taga,*len_s; 4599 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4600 PetscInt proc,m; 4601 PetscInt **buf_ri,**buf_rj; 4602 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4603 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4604 MPI_Request *s_waits,*r_waits; 4605 MPI_Status *status; 4606 MatScalar *aa=a->a; 4607 MatScalar **abuf_r,*ba_i; 4608 Mat_Merge_SeqsToMPI *merge; 4609 PetscContainer container; 4610 4611 PetscFunctionBegin; 4612 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4613 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4614 4615 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4616 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4617 4618 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4619 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4620 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4621 4622 bi = merge->bi; 4623 bj = merge->bj; 4624 buf_ri = merge->buf_ri; 4625 buf_rj = merge->buf_rj; 4626 4627 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4628 owners = merge->rowmap->range; 4629 len_s = merge->len_s; 4630 4631 /* send and recv matrix values */ 4632 /*-----------------------------*/ 4633 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4634 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4635 4636 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4637 for (proc=0,k=0; proc<size; proc++) { 4638 if (!len_s[proc]) continue; 4639 i = owners[proc]; 4640 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr); 4641 k++; 4642 } 4643 4644 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);} 4645 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);} 4646 ierr = PetscFree(status);CHKERRQ(ierr); 4647 4648 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4649 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4650 4651 /* insert mat values of mpimat */ 4652 /*----------------------------*/ 4653 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4654 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4655 4656 for (k=0; k<merge->nrecv; k++) { 4657 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4658 nrows = *(buf_ri_k[k]); 4659 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4660 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4661 } 4662 4663 /* set values of ba */ 4664 m = merge->rowmap->n; 4665 for (i=0; i<m; i++) { 4666 arow = owners[rank] + i; 4667 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4668 bnzi = bi[i+1] - bi[i]; 4669 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4670 4671 /* add local non-zero vals of this proc's seqmat into ba */ 4672 anzi = ai[arow+1] - ai[arow]; 4673 aj = a->j + ai[arow]; 4674 aa = a->a + ai[arow]; 4675 nextaj = 0; 4676 for (j=0; nextaj<anzi; j++) { 4677 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4678 ba_i[j] += aa[nextaj++]; 4679 } 4680 } 4681 4682 /* add received vals into ba */ 4683 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4684 /* i-th row */ 4685 if (i == *nextrow[k]) { 4686 anzi = *(nextai[k]+1) - *nextai[k]; 4687 aj = buf_rj[k] + *(nextai[k]); 4688 aa = abuf_r[k] + *(nextai[k]); 4689 nextaj = 0; 4690 for (j=0; nextaj<anzi; j++) { 4691 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4692 ba_i[j] += aa[nextaj++]; 4693 } 4694 } 4695 nextrow[k]++; nextai[k]++; 4696 } 4697 } 4698 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4699 } 4700 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4701 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4702 4703 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4704 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4705 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4706 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4707 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4708 PetscFunctionReturn(0); 4709 } 4710 4711 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4712 { 4713 PetscErrorCode ierr; 4714 Mat B_mpi; 4715 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4716 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4717 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4718 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4719 PetscInt len,proc,*dnz,*onz,bs,cbs; 4720 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4721 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4722 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4723 MPI_Status *status; 4724 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4725 PetscBT lnkbt; 4726 Mat_Merge_SeqsToMPI *merge; 4727 PetscContainer container; 4728 4729 PetscFunctionBegin; 4730 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4731 4732 /* make sure it is a PETSc comm */ 4733 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4734 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4735 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4736 4737 ierr = PetscNew(&merge);CHKERRQ(ierr); 4738 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4739 4740 /* determine row ownership */ 4741 /*---------------------------------------------------------*/ 4742 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4743 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4744 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4745 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4746 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4747 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4748 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4749 4750 m = merge->rowmap->n; 4751 owners = merge->rowmap->range; 4752 4753 /* determine the number of messages to send, their lengths */ 4754 /*---------------------------------------------------------*/ 4755 len_s = merge->len_s; 4756 4757 len = 0; /* length of buf_si[] */ 4758 merge->nsend = 0; 4759 for (proc=0; proc<size; proc++) { 4760 len_si[proc] = 0; 4761 if (proc == rank) { 4762 len_s[proc] = 0; 4763 } else { 4764 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4765 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4766 } 4767 if (len_s[proc]) { 4768 merge->nsend++; 4769 nrows = 0; 4770 for (i=owners[proc]; i<owners[proc+1]; i++) { 4771 if (ai[i+1] > ai[i]) nrows++; 4772 } 4773 len_si[proc] = 2*(nrows+1); 4774 len += len_si[proc]; 4775 } 4776 } 4777 4778 /* determine the number and length of messages to receive for ij-structure */ 4779 /*-------------------------------------------------------------------------*/ 4780 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4781 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4782 4783 /* post the Irecv of j-structure */ 4784 /*-------------------------------*/ 4785 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4786 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4787 4788 /* post the Isend of j-structure */ 4789 /*--------------------------------*/ 4790 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4791 4792 for (proc=0, k=0; proc<size; proc++) { 4793 if (!len_s[proc]) continue; 4794 i = owners[proc]; 4795 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr); 4796 k++; 4797 } 4798 4799 /* receives and sends of j-structure are complete */ 4800 /*------------------------------------------------*/ 4801 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);} 4802 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);} 4803 4804 /* send and recv i-structure */ 4805 /*---------------------------*/ 4806 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4807 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4808 4809 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4810 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4811 for (proc=0,k=0; proc<size; proc++) { 4812 if (!len_s[proc]) continue; 4813 /* form outgoing message for i-structure: 4814 buf_si[0]: nrows to be sent 4815 [1:nrows]: row index (global) 4816 [nrows+1:2*nrows+1]: i-structure index 4817 */ 4818 /*-------------------------------------------*/ 4819 nrows = len_si[proc]/2 - 1; 4820 buf_si_i = buf_si + nrows+1; 4821 buf_si[0] = nrows; 4822 buf_si_i[0] = 0; 4823 nrows = 0; 4824 for (i=owners[proc]; i<owners[proc+1]; i++) { 4825 anzi = ai[i+1] - ai[i]; 4826 if (anzi) { 4827 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4828 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4829 nrows++; 4830 } 4831 } 4832 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr); 4833 k++; 4834 buf_si += len_si[proc]; 4835 } 4836 4837 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);} 4838 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);} 4839 4840 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4841 for (i=0; i<merge->nrecv; i++) { 4842 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4843 } 4844 4845 ierr = PetscFree(len_si);CHKERRQ(ierr); 4846 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4847 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4848 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4849 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4850 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4851 ierr = PetscFree(status);CHKERRQ(ierr); 4852 4853 /* compute a local seq matrix in each processor */ 4854 /*----------------------------------------------*/ 4855 /* allocate bi array and free space for accumulating nonzero column info */ 4856 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4857 bi[0] = 0; 4858 4859 /* create and initialize a linked list */ 4860 nlnk = N+1; 4861 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4862 4863 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4864 len = ai[owners[rank+1]] - ai[owners[rank]]; 4865 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4866 4867 current_space = free_space; 4868 4869 /* determine symbolic info for each local row */ 4870 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4871 4872 for (k=0; k<merge->nrecv; k++) { 4873 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4874 nrows = *buf_ri_k[k]; 4875 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4876 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4877 } 4878 4879 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4880 len = 0; 4881 for (i=0; i<m; i++) { 4882 bnzi = 0; 4883 /* add local non-zero cols of this proc's seqmat into lnk */ 4884 arow = owners[rank] + i; 4885 anzi = ai[arow+1] - ai[arow]; 4886 aj = a->j + ai[arow]; 4887 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4888 bnzi += nlnk; 4889 /* add received col data into lnk */ 4890 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4891 if (i == *nextrow[k]) { /* i-th row */ 4892 anzi = *(nextai[k]+1) - *nextai[k]; 4893 aj = buf_rj[k] + *nextai[k]; 4894 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4895 bnzi += nlnk; 4896 nextrow[k]++; nextai[k]++; 4897 } 4898 } 4899 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4900 4901 /* if free space is not available, make more free space */ 4902 if (current_space->local_remaining<bnzi) { 4903 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4904 nspacedouble++; 4905 } 4906 /* copy data into free space, then initialize lnk */ 4907 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4908 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4909 4910 current_space->array += bnzi; 4911 current_space->local_used += bnzi; 4912 current_space->local_remaining -= bnzi; 4913 4914 bi[i+1] = bi[i] + bnzi; 4915 } 4916 4917 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4918 4919 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4920 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4921 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4922 4923 /* create symbolic parallel matrix B_mpi */ 4924 /*---------------------------------------*/ 4925 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4926 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4927 if (n==PETSC_DECIDE) { 4928 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4929 } else { 4930 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4931 } 4932 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4933 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4934 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4935 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4936 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4937 4938 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4939 B_mpi->assembled = PETSC_FALSE; 4940 merge->bi = bi; 4941 merge->bj = bj; 4942 merge->buf_ri = buf_ri; 4943 merge->buf_rj = buf_rj; 4944 merge->coi = NULL; 4945 merge->coj = NULL; 4946 merge->owners_co = NULL; 4947 4948 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4949 4950 /* attach the supporting struct to B_mpi for reuse */ 4951 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4952 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4953 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 4954 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4955 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4956 *mpimat = B_mpi; 4957 4958 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4959 PetscFunctionReturn(0); 4960 } 4961 4962 /*@C 4963 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4964 matrices from each processor 4965 4966 Collective 4967 4968 Input Parameters: 4969 + comm - the communicators the parallel matrix will live on 4970 . seqmat - the input sequential matrices 4971 . m - number of local rows (or PETSC_DECIDE) 4972 . n - number of local columns (or PETSC_DECIDE) 4973 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4974 4975 Output Parameter: 4976 . mpimat - the parallel matrix generated 4977 4978 Level: advanced 4979 4980 Notes: 4981 The dimensions of the sequential matrix in each processor MUST be the same. 4982 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4983 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4984 @*/ 4985 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4986 { 4987 PetscErrorCode ierr; 4988 PetscMPIInt size; 4989 4990 PetscFunctionBegin; 4991 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4992 if (size == 1) { 4993 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4994 if (scall == MAT_INITIAL_MATRIX) { 4995 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4996 } else { 4997 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4998 } 4999 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5000 PetscFunctionReturn(0); 5001 } 5002 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5003 if (scall == MAT_INITIAL_MATRIX) { 5004 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5005 } 5006 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5007 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5008 PetscFunctionReturn(0); 5009 } 5010 5011 /*@ 5012 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5013 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5014 with MatGetSize() 5015 5016 Not Collective 5017 5018 Input Parameters: 5019 + A - the matrix 5020 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5021 5022 Output Parameter: 5023 . A_loc - the local sequential matrix generated 5024 5025 Level: developer 5026 5027 Notes: 5028 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5029 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5030 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5031 modify the values of the returned A_loc. 5032 5033 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 5034 @*/ 5035 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5036 { 5037 PetscErrorCode ierr; 5038 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5039 Mat_SeqAIJ *mat,*a,*b; 5040 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5041 const PetscScalar *aa,*ba,*aav,*bav; 5042 PetscScalar *ca,*cam; 5043 PetscMPIInt size; 5044 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5045 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5046 PetscBool match; 5047 5048 PetscFunctionBegin; 5049 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5050 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5051 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5052 if (size == 1) { 5053 if (scall == MAT_INITIAL_MATRIX) { 5054 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5055 *A_loc = mpimat->A; 5056 } else if (scall == MAT_REUSE_MATRIX) { 5057 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5058 } 5059 PetscFunctionReturn(0); 5060 } 5061 5062 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5063 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5064 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5065 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5066 ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5067 ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5068 aa = aav; 5069 ba = bav; 5070 if (scall == MAT_INITIAL_MATRIX) { 5071 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5072 ci[0] = 0; 5073 for (i=0; i<am; i++) { 5074 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5075 } 5076 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5077 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5078 k = 0; 5079 for (i=0; i<am; i++) { 5080 ncols_o = bi[i+1] - bi[i]; 5081 ncols_d = ai[i+1] - ai[i]; 5082 /* off-diagonal portion of A */ 5083 for (jo=0; jo<ncols_o; jo++) { 5084 col = cmap[*bj]; 5085 if (col >= cstart) break; 5086 cj[k] = col; bj++; 5087 ca[k++] = *ba++; 5088 } 5089 /* diagonal portion of A */ 5090 for (j=0; j<ncols_d; j++) { 5091 cj[k] = cstart + *aj++; 5092 ca[k++] = *aa++; 5093 } 5094 /* off-diagonal portion of A */ 5095 for (j=jo; j<ncols_o; j++) { 5096 cj[k] = cmap[*bj++]; 5097 ca[k++] = *ba++; 5098 } 5099 } 5100 /* put together the new matrix */ 5101 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5102 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5103 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5104 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5105 mat->free_a = PETSC_TRUE; 5106 mat->free_ij = PETSC_TRUE; 5107 mat->nonew = 0; 5108 } else if (scall == MAT_REUSE_MATRIX) { 5109 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5110 #if defined(PETSC_USE_DEVICE) 5111 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5112 #endif 5113 ci = mat->i; cj = mat->j; cam = mat->a; 5114 for (i=0; i<am; i++) { 5115 /* off-diagonal portion of A */ 5116 ncols_o = bi[i+1] - bi[i]; 5117 for (jo=0; jo<ncols_o; jo++) { 5118 col = cmap[*bj]; 5119 if (col >= cstart) break; 5120 *cam++ = *ba++; bj++; 5121 } 5122 /* diagonal portion of A */ 5123 ncols_d = ai[i+1] - ai[i]; 5124 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5125 /* off-diagonal portion of A */ 5126 for (j=jo; j<ncols_o; j++) { 5127 *cam++ = *ba++; bj++; 5128 } 5129 } 5130 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5131 ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5132 ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5133 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5134 PetscFunctionReturn(0); 5135 } 5136 5137 /*@ 5138 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5139 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5140 5141 Not Collective 5142 5143 Input Parameters: 5144 + A - the matrix 5145 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5146 5147 Output Parameter: 5148 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5149 - A_loc - the local sequential matrix generated 5150 5151 Level: developer 5152 5153 Notes: 5154 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5155 5156 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5157 5158 @*/ 5159 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5160 { 5161 PetscErrorCode ierr; 5162 Mat Ao,Ad; 5163 const PetscInt *cmap; 5164 PetscMPIInt size; 5165 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5166 5167 PetscFunctionBegin; 5168 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr); 5169 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5170 if (size == 1) { 5171 if (scall == MAT_INITIAL_MATRIX) { 5172 ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr); 5173 *A_loc = Ad; 5174 } else if (scall == MAT_REUSE_MATRIX) { 5175 ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5176 } 5177 if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); } 5178 PetscFunctionReturn(0); 5179 } 5180 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr); 5181 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5182 if (f) { 5183 ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr); 5184 } else { 5185 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5186 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5187 Mat_SeqAIJ *c; 5188 PetscInt *ai = a->i, *aj = a->j; 5189 PetscInt *bi = b->i, *bj = b->j; 5190 PetscInt *ci,*cj; 5191 const PetscScalar *aa,*ba; 5192 PetscScalar *ca; 5193 PetscInt i,j,am,dn,on; 5194 5195 ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr); 5196 ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr); 5197 ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr); 5198 ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr); 5199 if (scall == MAT_INITIAL_MATRIX) { 5200 PetscInt k; 5201 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5202 ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr); 5203 ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr); 5204 ci[0] = 0; 5205 for (i=0,k=0; i<am; i++) { 5206 const PetscInt ncols_o = bi[i+1] - bi[i]; 5207 const PetscInt ncols_d = ai[i+1] - ai[i]; 5208 ci[i+1] = ci[i] + ncols_o + ncols_d; 5209 /* diagonal portion of A */ 5210 for (j=0; j<ncols_d; j++,k++) { 5211 cj[k] = *aj++; 5212 ca[k] = *aa++; 5213 } 5214 /* off-diagonal portion of A */ 5215 for (j=0; j<ncols_o; j++,k++) { 5216 cj[k] = dn + *bj++; 5217 ca[k] = *ba++; 5218 } 5219 } 5220 /* put together the new matrix */ 5221 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr); 5222 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5223 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5224 c = (Mat_SeqAIJ*)(*A_loc)->data; 5225 c->free_a = PETSC_TRUE; 5226 c->free_ij = PETSC_TRUE; 5227 c->nonew = 0; 5228 ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr); 5229 } else if (scall == MAT_REUSE_MATRIX) { 5230 #if defined(PETSC_HAVE_DEVICE) 5231 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5232 #endif 5233 c = (Mat_SeqAIJ*)(*A_loc)->data; 5234 ca = c->a; 5235 for (i=0; i<am; i++) { 5236 const PetscInt ncols_d = ai[i+1] - ai[i]; 5237 const PetscInt ncols_o = bi[i+1] - bi[i]; 5238 /* diagonal portion of A */ 5239 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5240 /* off-diagonal portion of A */ 5241 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5242 } 5243 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5244 ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr); 5245 ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr); 5246 if (glob) { 5247 PetscInt cst, *gidx; 5248 5249 ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr); 5250 ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr); 5251 for (i=0; i<dn; i++) gidx[i] = cst + i; 5252 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5253 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr); 5254 } 5255 } 5256 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5257 PetscFunctionReturn(0); 5258 } 5259 5260 /*@C 5261 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5262 5263 Not Collective 5264 5265 Input Parameters: 5266 + A - the matrix 5267 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5268 - row, col - index sets of rows and columns to extract (or NULL) 5269 5270 Output Parameter: 5271 . A_loc - the local sequential matrix generated 5272 5273 Level: developer 5274 5275 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5276 5277 @*/ 5278 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5279 { 5280 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5281 PetscErrorCode ierr; 5282 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5283 IS isrowa,iscola; 5284 Mat *aloc; 5285 PetscBool match; 5286 5287 PetscFunctionBegin; 5288 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5289 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5290 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5291 if (!row) { 5292 start = A->rmap->rstart; end = A->rmap->rend; 5293 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5294 } else { 5295 isrowa = *row; 5296 } 5297 if (!col) { 5298 start = A->cmap->rstart; 5299 cmap = a->garray; 5300 nzA = a->A->cmap->n; 5301 nzB = a->B->cmap->n; 5302 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5303 ncols = 0; 5304 for (i=0; i<nzB; i++) { 5305 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5306 else break; 5307 } 5308 imark = i; 5309 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5310 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5311 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5312 } else { 5313 iscola = *col; 5314 } 5315 if (scall != MAT_INITIAL_MATRIX) { 5316 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5317 aloc[0] = *A_loc; 5318 } 5319 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5320 if (!col) { /* attach global id of condensed columns */ 5321 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5322 } 5323 *A_loc = aloc[0]; 5324 ierr = PetscFree(aloc);CHKERRQ(ierr); 5325 if (!row) { 5326 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5327 } 5328 if (!col) { 5329 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5330 } 5331 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5332 PetscFunctionReturn(0); 5333 } 5334 5335 /* 5336 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5337 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5338 * on a global size. 5339 * */ 5340 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5341 { 5342 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5343 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5344 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5345 PetscMPIInt owner; 5346 PetscSFNode *iremote,*oiremote; 5347 const PetscInt *lrowindices; 5348 PetscErrorCode ierr; 5349 PetscSF sf,osf; 5350 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5351 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5352 MPI_Comm comm; 5353 ISLocalToGlobalMapping mapping; 5354 5355 PetscFunctionBegin; 5356 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5357 /* plocalsize is the number of roots 5358 * nrows is the number of leaves 5359 * */ 5360 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5361 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5362 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5363 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5364 for (i=0;i<nrows;i++) { 5365 /* Find a remote index and an owner for a row 5366 * The row could be local or remote 5367 * */ 5368 owner = 0; 5369 lidx = 0; 5370 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5371 iremote[i].index = lidx; 5372 iremote[i].rank = owner; 5373 } 5374 /* Create SF to communicate how many nonzero columns for each row */ 5375 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5376 /* SF will figure out the number of nonzero colunms for each row, and their 5377 * offsets 5378 * */ 5379 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5380 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5381 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5382 5383 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5384 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5385 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5386 roffsets[0] = 0; 5387 roffsets[1] = 0; 5388 for (i=0;i<plocalsize;i++) { 5389 /* diag */ 5390 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5391 /* off diag */ 5392 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5393 /* compute offsets so that we relative location for each row */ 5394 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5395 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5396 } 5397 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5398 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5399 /* 'r' means root, and 'l' means leaf */ 5400 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5401 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5402 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5403 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5404 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5405 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5406 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5407 dntotalcols = 0; 5408 ontotalcols = 0; 5409 ncol = 0; 5410 for (i=0;i<nrows;i++) { 5411 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5412 ncol = PetscMax(pnnz[i],ncol); 5413 /* diag */ 5414 dntotalcols += nlcols[i*2+0]; 5415 /* off diag */ 5416 ontotalcols += nlcols[i*2+1]; 5417 } 5418 /* We do not need to figure the right number of columns 5419 * since all the calculations will be done by going through the raw data 5420 * */ 5421 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5422 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5423 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5424 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5425 /* diag */ 5426 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5427 /* off diag */ 5428 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5429 /* diag */ 5430 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5431 /* off diag */ 5432 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5433 dntotalcols = 0; 5434 ontotalcols = 0; 5435 ntotalcols = 0; 5436 for (i=0;i<nrows;i++) { 5437 owner = 0; 5438 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5439 /* Set iremote for diag matrix */ 5440 for (j=0;j<nlcols[i*2+0];j++) { 5441 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5442 iremote[dntotalcols].rank = owner; 5443 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5444 ilocal[dntotalcols++] = ntotalcols++; 5445 } 5446 /* off diag */ 5447 for (j=0;j<nlcols[i*2+1];j++) { 5448 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5449 oiremote[ontotalcols].rank = owner; 5450 oilocal[ontotalcols++] = ntotalcols++; 5451 } 5452 } 5453 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5454 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5455 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5456 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5457 /* P serves as roots and P_oth is leaves 5458 * Diag matrix 5459 * */ 5460 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5461 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5462 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5463 5464 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5465 /* Off diag */ 5466 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5467 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5468 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5469 /* We operate on the matrix internal data for saving memory */ 5470 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5471 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5472 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5473 /* Convert to global indices for diag matrix */ 5474 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5475 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5476 /* We want P_oth store global indices */ 5477 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5478 /* Use memory scalable approach */ 5479 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5480 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5481 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5482 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5483 /* Convert back to local indices */ 5484 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5485 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5486 nout = 0; 5487 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5488 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5489 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5490 /* Exchange values */ 5491 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5492 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5493 /* Stop PETSc from shrinking memory */ 5494 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5495 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5496 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5497 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5498 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5499 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5500 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5501 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5502 PetscFunctionReturn(0); 5503 } 5504 5505 /* 5506 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5507 * This supports MPIAIJ and MAIJ 5508 * */ 5509 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5510 { 5511 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5512 Mat_SeqAIJ *p_oth; 5513 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5514 IS rows,map; 5515 PetscHMapI hamp; 5516 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5517 MPI_Comm comm; 5518 PetscSF sf,osf; 5519 PetscBool has; 5520 PetscErrorCode ierr; 5521 5522 PetscFunctionBegin; 5523 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5524 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5525 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5526 * and then create a submatrix (that often is an overlapping matrix) 5527 * */ 5528 if (reuse == MAT_INITIAL_MATRIX) { 5529 /* Use a hash table to figure out unique keys */ 5530 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5531 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5532 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5533 count = 0; 5534 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5535 for (i=0;i<a->B->cmap->n;i++) { 5536 key = a->garray[i]/dof; 5537 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5538 if (!has) { 5539 mapping[i] = count; 5540 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5541 } else { 5542 /* Current 'i' has the same value the previous step */ 5543 mapping[i] = count-1; 5544 } 5545 } 5546 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5547 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5548 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5549 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5550 off = 0; 5551 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5552 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5553 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5554 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5555 /* In case, the matrix was already created but users want to recreate the matrix */ 5556 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5557 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5558 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5559 ierr = ISDestroy(&map);CHKERRQ(ierr); 5560 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5561 } else if (reuse == MAT_REUSE_MATRIX) { 5562 /* If matrix was already created, we simply update values using SF objects 5563 * that as attached to the matrix ealier. 5564 * */ 5565 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5566 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5567 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5568 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5569 /* Update values in place */ 5570 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5571 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5572 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5573 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5574 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5575 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5576 PetscFunctionReturn(0); 5577 } 5578 5579 /*@C 5580 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5581 5582 Collective on Mat 5583 5584 Input Parameters: 5585 + A,B - the matrices in mpiaij format 5586 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5587 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5588 5589 Output Parameter: 5590 + rowb, colb - index sets of rows and columns of B to extract 5591 - B_seq - the sequential matrix generated 5592 5593 Level: developer 5594 5595 @*/ 5596 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5597 { 5598 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5599 PetscErrorCode ierr; 5600 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5601 IS isrowb,iscolb; 5602 Mat *bseq=NULL; 5603 5604 PetscFunctionBegin; 5605 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5606 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5607 } 5608 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5609 5610 if (scall == MAT_INITIAL_MATRIX) { 5611 start = A->cmap->rstart; 5612 cmap = a->garray; 5613 nzA = a->A->cmap->n; 5614 nzB = a->B->cmap->n; 5615 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5616 ncols = 0; 5617 for (i=0; i<nzB; i++) { /* row < local row index */ 5618 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5619 else break; 5620 } 5621 imark = i; 5622 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5623 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5624 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5625 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5626 } else { 5627 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5628 isrowb = *rowb; iscolb = *colb; 5629 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5630 bseq[0] = *B_seq; 5631 } 5632 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5633 *B_seq = bseq[0]; 5634 ierr = PetscFree(bseq);CHKERRQ(ierr); 5635 if (!rowb) { 5636 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5637 } else { 5638 *rowb = isrowb; 5639 } 5640 if (!colb) { 5641 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5642 } else { 5643 *colb = iscolb; 5644 } 5645 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5646 PetscFunctionReturn(0); 5647 } 5648 5649 /* 5650 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5651 of the OFF-DIAGONAL portion of local A 5652 5653 Collective on Mat 5654 5655 Input Parameters: 5656 + A,B - the matrices in mpiaij format 5657 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5658 5659 Output Parameter: 5660 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5661 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5662 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5663 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5664 5665 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5666 for this matrix. This is not desirable.. 5667 5668 Level: developer 5669 5670 */ 5671 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5672 { 5673 PetscErrorCode ierr; 5674 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5675 Mat_SeqAIJ *b_oth; 5676 VecScatter ctx; 5677 MPI_Comm comm; 5678 const PetscMPIInt *rprocs,*sprocs; 5679 const PetscInt *srow,*rstarts,*sstarts; 5680 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5681 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5682 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5683 MPI_Request *rwaits = NULL,*swaits = NULL; 5684 MPI_Status rstatus; 5685 PetscMPIInt size,tag,rank,nsends_mpi,nrecvs_mpi; 5686 PETSC_UNUSED PetscMPIInt jj; 5687 5688 PetscFunctionBegin; 5689 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5690 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5691 5692 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5693 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5694 } 5695 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5696 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 5697 5698 if (size == 1) { 5699 startsj_s = NULL; 5700 bufa_ptr = NULL; 5701 *B_oth = NULL; 5702 PetscFunctionReturn(0); 5703 } 5704 5705 ctx = a->Mvctx; 5706 tag = ((PetscObject)ctx)->tag; 5707 5708 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5709 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5710 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5711 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5712 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5713 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5714 5715 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5716 if (scall == MAT_INITIAL_MATRIX) { 5717 /* i-array */ 5718 /*---------*/ 5719 /* post receives */ 5720 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5721 for (i=0; i<nrecvs; i++) { 5722 rowlen = rvalues + rstarts[i]*rbs; 5723 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5724 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5725 } 5726 5727 /* pack the outgoing message */ 5728 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5729 5730 sstartsj[0] = 0; 5731 rstartsj[0] = 0; 5732 len = 0; /* total length of j or a array to be sent */ 5733 if (nsends) { 5734 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5735 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5736 } 5737 for (i=0; i<nsends; i++) { 5738 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5739 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5740 for (j=0; j<nrows; j++) { 5741 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5742 for (l=0; l<sbs; l++) { 5743 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5744 5745 rowlen[j*sbs+l] = ncols; 5746 5747 len += ncols; 5748 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5749 } 5750 k++; 5751 } 5752 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5753 5754 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5755 } 5756 /* recvs and sends of i-array are completed */ 5757 i = nrecvs; 5758 while (i--) { 5759 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr); 5760 } 5761 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5762 ierr = PetscFree(svalues);CHKERRQ(ierr); 5763 5764 /* allocate buffers for sending j and a arrays */ 5765 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5766 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5767 5768 /* create i-array of B_oth */ 5769 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5770 5771 b_othi[0] = 0; 5772 len = 0; /* total length of j or a array to be received */ 5773 k = 0; 5774 for (i=0; i<nrecvs; i++) { 5775 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5776 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5777 for (j=0; j<nrows; j++) { 5778 b_othi[k+1] = b_othi[k] + rowlen[j]; 5779 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5780 k++; 5781 } 5782 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5783 } 5784 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5785 5786 /* allocate space for j and a arrrays of B_oth */ 5787 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5788 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5789 5790 /* j-array */ 5791 /*---------*/ 5792 /* post receives of j-array */ 5793 for (i=0; i<nrecvs; i++) { 5794 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5795 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5796 } 5797 5798 /* pack the outgoing message j-array */ 5799 if (nsends) k = sstarts[0]; 5800 for (i=0; i<nsends; i++) { 5801 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5802 bufJ = bufj+sstartsj[i]; 5803 for (j=0; j<nrows; j++) { 5804 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5805 for (ll=0; ll<sbs; ll++) { 5806 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5807 for (l=0; l<ncols; l++) { 5808 *bufJ++ = cols[l]; 5809 } 5810 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5811 } 5812 } 5813 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5814 } 5815 5816 /* recvs and sends of j-array are completed */ 5817 i = nrecvs; 5818 while (i--) { 5819 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr); 5820 } 5821 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5822 } else if (scall == MAT_REUSE_MATRIX) { 5823 sstartsj = *startsj_s; 5824 rstartsj = *startsj_r; 5825 bufa = *bufa_ptr; 5826 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5827 b_otha = b_oth->a; 5828 #if defined(PETSC_HAVE_DEVICE) 5829 (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU; 5830 #endif 5831 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5832 5833 /* a-array */ 5834 /*---------*/ 5835 /* post receives of a-array */ 5836 for (i=0; i<nrecvs; i++) { 5837 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5838 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5839 } 5840 5841 /* pack the outgoing message a-array */ 5842 if (nsends) k = sstarts[0]; 5843 for (i=0; i<nsends; i++) { 5844 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5845 bufA = bufa+sstartsj[i]; 5846 for (j=0; j<nrows; j++) { 5847 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5848 for (ll=0; ll<sbs; ll++) { 5849 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5850 for (l=0; l<ncols; l++) { 5851 *bufA++ = vals[l]; 5852 } 5853 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5854 } 5855 } 5856 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5857 } 5858 /* recvs and sends of a-array are completed */ 5859 i = nrecvs; 5860 while (i--) { 5861 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr); 5862 } 5863 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5864 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5865 5866 if (scall == MAT_INITIAL_MATRIX) { 5867 /* put together the new matrix */ 5868 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5869 5870 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5871 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5872 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5873 b_oth->free_a = PETSC_TRUE; 5874 b_oth->free_ij = PETSC_TRUE; 5875 b_oth->nonew = 0; 5876 5877 ierr = PetscFree(bufj);CHKERRQ(ierr); 5878 if (!startsj_s || !bufa_ptr) { 5879 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5880 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5881 } else { 5882 *startsj_s = sstartsj; 5883 *startsj_r = rstartsj; 5884 *bufa_ptr = bufa; 5885 } 5886 } 5887 5888 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5889 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5890 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5891 PetscFunctionReturn(0); 5892 } 5893 5894 /*@C 5895 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5896 5897 Not Collective 5898 5899 Input Parameters: 5900 . A - The matrix in mpiaij format 5901 5902 Output Parameter: 5903 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5904 . colmap - A map from global column index to local index into lvec 5905 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5906 5907 Level: developer 5908 5909 @*/ 5910 #if defined(PETSC_USE_CTABLE) 5911 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5912 #else 5913 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5914 #endif 5915 { 5916 Mat_MPIAIJ *a; 5917 5918 PetscFunctionBegin; 5919 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5920 PetscValidPointer(lvec, 2); 5921 PetscValidPointer(colmap, 3); 5922 PetscValidPointer(multScatter, 4); 5923 a = (Mat_MPIAIJ*) A->data; 5924 if (lvec) *lvec = a->lvec; 5925 if (colmap) *colmap = a->colmap; 5926 if (multScatter) *multScatter = a->Mvctx; 5927 PetscFunctionReturn(0); 5928 } 5929 5930 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5931 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5932 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5933 #if defined(PETSC_HAVE_MKL_SPARSE) 5934 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5935 #endif 5936 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5937 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5938 #if defined(PETSC_HAVE_ELEMENTAL) 5939 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5940 #endif 5941 #if defined(PETSC_HAVE_SCALAPACK) 5942 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5943 #endif 5944 #if defined(PETSC_HAVE_HYPRE) 5945 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5946 #endif 5947 #if defined(PETSC_HAVE_CUDA) 5948 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5949 #endif 5950 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5951 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5952 #endif 5953 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5954 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5955 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5956 5957 /* 5958 Computes (B'*A')' since computing B*A directly is untenable 5959 5960 n p p 5961 [ ] [ ] [ ] 5962 m [ A ] * n [ B ] = m [ C ] 5963 [ ] [ ] [ ] 5964 5965 */ 5966 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5967 { 5968 PetscErrorCode ierr; 5969 Mat At,Bt,Ct; 5970 5971 PetscFunctionBegin; 5972 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5973 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5974 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5975 ierr = MatDestroy(&At);CHKERRQ(ierr); 5976 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5977 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5978 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5979 PetscFunctionReturn(0); 5980 } 5981 5982 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5983 { 5984 PetscErrorCode ierr; 5985 PetscBool cisdense; 5986 5987 PetscFunctionBegin; 5988 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5989 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 5990 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5991 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 5992 if (!cisdense) { 5993 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 5994 } 5995 ierr = MatSetUp(C);CHKERRQ(ierr); 5996 5997 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5998 PetscFunctionReturn(0); 5999 } 6000 6001 /* ----------------------------------------------------------------*/ 6002 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6003 { 6004 Mat_Product *product = C->product; 6005 Mat A = product->A,B=product->B; 6006 6007 PetscFunctionBegin; 6008 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6009 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6010 6011 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6012 C->ops->productsymbolic = MatProductSymbolic_AB; 6013 PetscFunctionReturn(0); 6014 } 6015 6016 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6017 { 6018 PetscErrorCode ierr; 6019 Mat_Product *product = C->product; 6020 6021 PetscFunctionBegin; 6022 if (product->type == MATPRODUCT_AB) { 6023 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 6024 } 6025 PetscFunctionReturn(0); 6026 } 6027 /* ----------------------------------------------------------------*/ 6028 6029 /*MC 6030 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6031 6032 Options Database Keys: 6033 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6034 6035 Level: beginner 6036 6037 Notes: 6038 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6039 in this case the values associated with the rows and columns one passes in are set to zero 6040 in the matrix 6041 6042 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6043 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6044 6045 .seealso: MatCreateAIJ() 6046 M*/ 6047 6048 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6049 { 6050 Mat_MPIAIJ *b; 6051 PetscErrorCode ierr; 6052 PetscMPIInt size; 6053 6054 PetscFunctionBegin; 6055 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 6056 6057 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6058 B->data = (void*)b; 6059 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6060 B->assembled = PETSC_FALSE; 6061 B->insertmode = NOT_SET_VALUES; 6062 b->size = size; 6063 6064 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr); 6065 6066 /* build cache for off array entries formed */ 6067 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6068 6069 b->donotstash = PETSC_FALSE; 6070 b->colmap = NULL; 6071 b->garray = NULL; 6072 b->roworiented = PETSC_TRUE; 6073 6074 /* stuff used for matrix vector multiply */ 6075 b->lvec = NULL; 6076 b->Mvctx = NULL; 6077 6078 /* stuff for MatGetRow() */ 6079 b->rowindices = NULL; 6080 b->rowvalues = NULL; 6081 b->getrowactive = PETSC_FALSE; 6082 6083 /* flexible pointer used in CUSPARSE classes */ 6084 b->spptr = NULL; 6085 6086 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6087 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6088 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6089 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6090 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6092 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6093 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6094 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6095 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6096 #if defined(PETSC_HAVE_CUDA) 6097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr); 6098 #endif 6099 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6100 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr); 6101 #endif 6102 #if defined(PETSC_HAVE_MKL_SPARSE) 6103 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6104 #endif 6105 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6106 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6107 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6108 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr); 6109 #if defined(PETSC_HAVE_ELEMENTAL) 6110 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6111 #endif 6112 #if defined(PETSC_HAVE_SCALAPACK) 6113 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6114 #endif 6115 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6116 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6117 #if defined(PETSC_HAVE_HYPRE) 6118 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6119 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6120 #endif 6121 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6122 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6123 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6124 PetscFunctionReturn(0); 6125 } 6126 6127 /*@C 6128 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6129 and "off-diagonal" part of the matrix in CSR format. 6130 6131 Collective 6132 6133 Input Parameters: 6134 + comm - MPI communicator 6135 . m - number of local rows (Cannot be PETSC_DECIDE) 6136 . n - This value should be the same as the local size used in creating the 6137 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6138 calculated if N is given) For square matrices n is almost always m. 6139 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6140 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6141 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6142 . j - column indices 6143 . a - matrix values 6144 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6145 . oj - column indices 6146 - oa - matrix values 6147 6148 Output Parameter: 6149 . mat - the matrix 6150 6151 Level: advanced 6152 6153 Notes: 6154 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6155 must free the arrays once the matrix has been destroyed and not before. 6156 6157 The i and j indices are 0 based 6158 6159 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6160 6161 This sets local rows and cannot be used to set off-processor values. 6162 6163 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6164 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6165 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6166 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6167 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6168 communication if it is known that only local entries will be set. 6169 6170 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6171 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6172 @*/ 6173 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6174 { 6175 PetscErrorCode ierr; 6176 Mat_MPIAIJ *maij; 6177 6178 PetscFunctionBegin; 6179 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6180 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6181 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6182 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6183 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6184 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6185 maij = (Mat_MPIAIJ*) (*mat)->data; 6186 6187 (*mat)->preallocated = PETSC_TRUE; 6188 6189 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6190 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6191 6192 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6193 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6194 6195 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6196 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6197 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6198 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6199 6200 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6201 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6202 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6203 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6204 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6205 PetscFunctionReturn(0); 6206 } 6207 6208 /* 6209 Special version for direct calls from Fortran 6210 */ 6211 #include <petsc/private/fortranimpl.h> 6212 6213 /* Change these macros so can be used in void function */ 6214 #undef CHKERRQ 6215 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6216 #undef SETERRQ2 6217 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6218 #undef SETERRQ3 6219 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6220 #undef SETERRQ 6221 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6222 6223 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6224 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6225 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6226 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6227 #else 6228 #endif 6229 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6230 { 6231 Mat mat = *mmat; 6232 PetscInt m = *mm, n = *mn; 6233 InsertMode addv = *maddv; 6234 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6235 PetscScalar value; 6236 PetscErrorCode ierr; 6237 6238 MatCheckPreallocated(mat,1); 6239 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6240 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6241 { 6242 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6243 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6244 PetscBool roworiented = aij->roworiented; 6245 6246 /* Some Variables required in the macro */ 6247 Mat A = aij->A; 6248 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6249 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6250 MatScalar *aa = a->a; 6251 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6252 Mat B = aij->B; 6253 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6254 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6255 MatScalar *ba = b->a; 6256 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6257 * cannot use "#if defined" inside a macro. */ 6258 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6259 6260 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6261 PetscInt nonew = a->nonew; 6262 MatScalar *ap1,*ap2; 6263 6264 PetscFunctionBegin; 6265 for (i=0; i<m; i++) { 6266 if (im[i] < 0) continue; 6267 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6268 if (im[i] >= rstart && im[i] < rend) { 6269 row = im[i] - rstart; 6270 lastcol1 = -1; 6271 rp1 = aj + ai[row]; 6272 ap1 = aa + ai[row]; 6273 rmax1 = aimax[row]; 6274 nrow1 = ailen[row]; 6275 low1 = 0; 6276 high1 = nrow1; 6277 lastcol2 = -1; 6278 rp2 = bj + bi[row]; 6279 ap2 = ba + bi[row]; 6280 rmax2 = bimax[row]; 6281 nrow2 = bilen[row]; 6282 low2 = 0; 6283 high2 = nrow2; 6284 6285 for (j=0; j<n; j++) { 6286 if (roworiented) value = v[i*n+j]; 6287 else value = v[i+j*m]; 6288 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6289 if (in[j] >= cstart && in[j] < cend) { 6290 col = in[j] - cstart; 6291 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6292 #if defined(PETSC_HAVE_DEVICE) 6293 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6294 #endif 6295 } else if (in[j] < 0) continue; 6296 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6297 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6298 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6299 } else { 6300 if (mat->was_assembled) { 6301 if (!aij->colmap) { 6302 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6303 } 6304 #if defined(PETSC_USE_CTABLE) 6305 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6306 col--; 6307 #else 6308 col = aij->colmap[in[j]] - 1; 6309 #endif 6310 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6311 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6312 col = in[j]; 6313 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6314 B = aij->B; 6315 b = (Mat_SeqAIJ*)B->data; 6316 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6317 rp2 = bj + bi[row]; 6318 ap2 = ba + bi[row]; 6319 rmax2 = bimax[row]; 6320 nrow2 = bilen[row]; 6321 low2 = 0; 6322 high2 = nrow2; 6323 bm = aij->B->rmap->n; 6324 ba = b->a; 6325 inserted = PETSC_FALSE; 6326 } 6327 } else col = in[j]; 6328 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6329 #if defined(PETSC_HAVE_DEVICE) 6330 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6331 #endif 6332 } 6333 } 6334 } else if (!aij->donotstash) { 6335 if (roworiented) { 6336 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6337 } else { 6338 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6339 } 6340 } 6341 } 6342 } 6343 PetscFunctionReturnVoid(); 6344 } 6345 6346 typedef struct { 6347 Mat *mp; /* intermediate products */ 6348 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6349 PetscInt cp; /* number of intermediate products */ 6350 6351 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6352 PetscInt *startsj_s,*startsj_r; 6353 PetscScalar *bufa; 6354 Mat P_oth; 6355 6356 /* may take advantage of merging product->B */ 6357 Mat Bloc; 6358 6359 /* cusparse does not have support to split between symbolic and numeric phases 6360 When api_user is true, we don't need to update the numerical values 6361 of the temporary storage */ 6362 PetscBool reusesym; 6363 6364 /* support for COO values insertion */ 6365 PetscScalar *coo_v,*coo_w; 6366 PetscInt **own; 6367 PetscInt **off; 6368 PetscBool hasoffproc; /* if true, non-local values insertion (i.e. AtB or PtAP) */ 6369 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6370 PetscMemType mtype; 6371 6372 /* customization */ 6373 PetscBool abmerge; 6374 PetscBool P_oth_bind; 6375 } MatMatMPIAIJBACKEND; 6376 6377 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6378 { 6379 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6380 PetscInt i; 6381 PetscErrorCode ierr; 6382 6383 PetscFunctionBegin; 6384 ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr); 6385 ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr); 6386 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr); 6387 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr); 6388 ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr); 6389 ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr); 6390 ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr); 6391 for (i = 0; i < mmdata->cp; i++) { 6392 ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr); 6393 } 6394 ierr = PetscFree(mmdata->mp);CHKERRQ(ierr); 6395 ierr = PetscFree(mmdata->mptmp);CHKERRQ(ierr); 6396 ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr); 6397 ierr = PetscFree(mmdata->own);CHKERRQ(ierr); 6398 ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr); 6399 ierr = PetscFree(mmdata->off);CHKERRQ(ierr); 6400 ierr = PetscFree(mmdata);CHKERRQ(ierr); 6401 PetscFunctionReturn(0); 6402 } 6403 6404 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6405 { 6406 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6407 PetscErrorCode ierr; 6408 6409 PetscFunctionBegin; 6410 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr); 6411 if (f) { 6412 ierr = (*f)(A,n,idx,v);CHKERRQ(ierr); 6413 } else { 6414 const PetscScalar *vv; 6415 6416 ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr); 6417 if (n && idx) { 6418 PetscScalar *w = v; 6419 const PetscInt *oi = idx; 6420 PetscInt j; 6421 6422 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6423 } else { 6424 ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr); 6425 } 6426 ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr); 6427 } 6428 PetscFunctionReturn(0); 6429 } 6430 6431 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6432 { 6433 MatMatMPIAIJBACKEND *mmdata; 6434 PetscInt i,n_d,n_o; 6435 PetscErrorCode ierr; 6436 6437 PetscFunctionBegin; 6438 MatCheckProduct(C,1); 6439 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6440 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6441 if (!mmdata->reusesym) { /* update temporary matrices */ 6442 if (mmdata->P_oth) { 6443 ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6444 } 6445 if (mmdata->Bloc) { 6446 ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr); 6447 } 6448 } 6449 mmdata->reusesym = PETSC_FALSE; 6450 6451 for (i = 0; i < mmdata->cp; i++) { 6452 if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6453 ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr); 6454 } 6455 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6456 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6457 6458 if (mmdata->mptmp[i]) continue; 6459 if (noff) { 6460 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6461 6462 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr); 6463 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr); 6464 n_o += noff; 6465 n_d += nown; 6466 } else { 6467 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6468 6469 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr); 6470 n_d += mm->nz; 6471 } 6472 } 6473 if (mmdata->hasoffproc) { /* offprocess insertion */ 6474 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6475 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6476 } 6477 ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr); 6478 PetscFunctionReturn(0); 6479 } 6480 6481 /* Support for Pt * A, A * P, or Pt * A * P */ 6482 #define MAX_NUMBER_INTERMEDIATE 4 6483 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6484 { 6485 Mat_Product *product = C->product; 6486 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; 6487 Mat_MPIAIJ *a,*p; 6488 MatMatMPIAIJBACKEND *mmdata; 6489 ISLocalToGlobalMapping P_oth_l2g = NULL; 6490 IS glob = NULL; 6491 const char *prefix; 6492 char pprefix[256]; 6493 const PetscInt *globidx,*P_oth_idx; 6494 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; 6495 PetscInt cp = 0,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j,cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE],i,j; 6496 MatProductType ptype; 6497 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6498 PetscMPIInt size; 6499 PetscErrorCode ierr; 6500 6501 PetscFunctionBegin; 6502 MatCheckProduct(C,1); 6503 if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6504 ptype = product->type; 6505 if (product->A->symmetric && ptype == MATPRODUCT_AtB) ptype = MATPRODUCT_AB; 6506 switch (ptype) { 6507 case MATPRODUCT_AB: 6508 A = product->A; 6509 P = product->B; 6510 m = A->rmap->n; 6511 n = P->cmap->n; 6512 M = A->rmap->N; 6513 N = P->cmap->N; 6514 break; 6515 case MATPRODUCT_AtB: 6516 P = product->A; 6517 A = product->B; 6518 m = P->cmap->n; 6519 n = A->cmap->n; 6520 M = P->cmap->N; 6521 N = A->cmap->N; 6522 hasoffproc = PETSC_TRUE; 6523 break; 6524 case MATPRODUCT_PtAP: 6525 A = product->A; 6526 P = product->B; 6527 m = P->cmap->n; 6528 n = P->cmap->n; 6529 M = P->cmap->N; 6530 N = P->cmap->N; 6531 hasoffproc = PETSC_TRUE; 6532 break; 6533 default: 6534 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6535 } 6536 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr); 6537 if (size == 1) hasoffproc = PETSC_FALSE; 6538 6539 /* defaults */ 6540 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6541 mp[i] = NULL; 6542 mptmp[i] = PETSC_FALSE; 6543 rmapt[i] = -1; 6544 cmapt[i] = -1; 6545 rmapa[i] = NULL; 6546 cmapa[i] = NULL; 6547 } 6548 6549 /* customization */ 6550 ierr = PetscNew(&mmdata);CHKERRQ(ierr); 6551 mmdata->reusesym = product->api_user; 6552 if (ptype == MATPRODUCT_AB) { 6553 if (product->api_user) { 6554 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 6555 ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6556 ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6557 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6558 } else { 6559 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 6560 ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6561 ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6562 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6563 } 6564 } else if (ptype == MATPRODUCT_PtAP) { 6565 if (product->api_user) { 6566 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 6567 ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6568 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6569 } else { 6570 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 6571 ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6572 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6573 } 6574 } 6575 a = (Mat_MPIAIJ*)A->data; 6576 p = (Mat_MPIAIJ*)P->data; 6577 ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr); 6578 ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr); 6579 ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr); 6580 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6581 ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr); 6582 switch (ptype) { 6583 case MATPRODUCT_AB: /* A * P */ 6584 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6585 6586 if (mmdata->abmerge) { /* A_diag * P_loc and A_off * P_oth */ 6587 /* P is product->B */ 6588 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6589 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6590 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6591 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6592 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6593 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6594 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6595 mp[cp]->product->api_user = product->api_user; 6596 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6597 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6598 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6599 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6600 rmapt[cp] = 1; 6601 cmapt[cp] = 2; 6602 cmapa[cp] = globidx; 6603 mptmp[cp] = PETSC_FALSE; 6604 cp++; 6605 } else { /* A_diag * P_diag and A_diag * P_off and A_off * P_oth */ 6606 ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr); 6607 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6608 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6609 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6610 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6611 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6612 mp[cp]->product->api_user = product->api_user; 6613 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6614 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6615 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6616 rmapt[cp] = 1; 6617 cmapt[cp] = 1; 6618 mptmp[cp] = PETSC_FALSE; 6619 cp++; 6620 ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr); 6621 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6622 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6623 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6624 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6625 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6626 mp[cp]->product->api_user = product->api_user; 6627 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6628 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6629 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6630 rmapt[cp] = 1; 6631 cmapt[cp] = 2; 6632 cmapa[cp] = p->garray; 6633 mptmp[cp] = PETSC_FALSE; 6634 cp++; 6635 } 6636 if (mmdata->P_oth) { 6637 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); 6638 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6639 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6640 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6641 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6642 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6643 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6644 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6645 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6646 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6647 mp[cp]->product->api_user = product->api_user; 6648 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6649 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6650 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6651 rmapt[cp] = 1; 6652 cmapt[cp] = 2; 6653 cmapa[cp] = P_oth_idx; 6654 mptmp[cp] = PETSC_FALSE; 6655 cp++; 6656 } 6657 break; 6658 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 6659 /* A is product->B */ 6660 ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6661 if (A == P) { 6662 ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6663 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6664 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6665 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6666 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6667 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6668 mp[cp]->product->api_user = product->api_user; 6669 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6670 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6671 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6672 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6673 rmapt[cp] = 2; 6674 rmapa[cp] = globidx; 6675 cmapt[cp] = 2; 6676 cmapa[cp] = globidx; 6677 mptmp[cp] = PETSC_FALSE; 6678 cp++; 6679 } else { 6680 ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6681 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6682 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6683 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6684 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6685 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6686 mp[cp]->product->api_user = product->api_user; 6687 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6688 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6689 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6690 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6691 rmapt[cp] = 1; 6692 cmapt[cp] = 2; 6693 cmapa[cp] = globidx; 6694 mptmp[cp] = PETSC_FALSE; 6695 cp++; 6696 ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6697 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6698 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6699 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6700 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6701 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6702 mp[cp]->product->api_user = product->api_user; 6703 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6704 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6705 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6706 rmapt[cp] = 2; 6707 rmapa[cp] = p->garray; 6708 cmapt[cp] = 2; 6709 cmapa[cp] = globidx; 6710 mptmp[cp] = PETSC_FALSE; 6711 cp++; 6712 } 6713 break; 6714 case MATPRODUCT_PtAP: 6715 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6716 /* P is product->B */ 6717 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6718 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6719 ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr); 6720 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6721 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6722 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6723 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6724 mp[cp]->product->api_user = product->api_user; 6725 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6726 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6727 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6728 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6729 rmapt[cp] = 2; 6730 rmapa[cp] = globidx; 6731 cmapt[cp] = 2; 6732 cmapa[cp] = globidx; 6733 mptmp[cp] = PETSC_FALSE; 6734 cp++; 6735 if (mmdata->P_oth) { 6736 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); 6737 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6738 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6739 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6740 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6741 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6742 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6743 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6744 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6745 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6746 mp[cp]->product->api_user = product->api_user; 6747 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6748 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6749 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6750 mptmp[cp] = PETSC_TRUE; 6751 cp++; 6752 ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr); 6753 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6754 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6755 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6756 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6757 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6758 mp[cp]->product->api_user = product->api_user; 6759 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6760 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6761 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6762 rmapt[cp] = 2; 6763 rmapa[cp] = globidx; 6764 cmapt[cp] = 2; 6765 cmapa[cp] = P_oth_idx; 6766 mptmp[cp] = PETSC_FALSE; 6767 cp++; 6768 } 6769 break; 6770 default: 6771 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6772 } 6773 /* sanity check */ 6774 if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i); 6775 6776 ierr = PetscMalloc1(cp,&mmdata->mp);CHKERRQ(ierr); 6777 for (i = 0; i < cp; i++) mmdata->mp[i] = mp[i]; 6778 ierr = PetscMalloc1(cp,&mmdata->mptmp);CHKERRQ(ierr); 6779 for (i = 0; i < cp; i++) mmdata->mptmp[i] = mptmp[i]; 6780 mmdata->cp = cp; 6781 C->product->data = mmdata; 6782 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 6783 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 6784 6785 /* memory type */ 6786 mmdata->mtype = PETSC_MEMTYPE_HOST; 6787 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr); 6788 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr); 6789 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 6790 // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented 6791 //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE; 6792 6793 /* prepare coo coordinates for values insertion */ 6794 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 6795 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6796 if (mptmp[cp]) continue; 6797 if (rmapt[cp] == 2 && hasoffproc) { 6798 const PetscInt *rmap = rmapa[cp]; 6799 const PetscInt mr = mp[cp]->rmap->n; 6800 const PetscInt rs = C->rmap->rstart; 6801 const PetscInt re = C->rmap->rend; 6802 const PetscInt *ii = mm->i; 6803 for (i = 0; i < mr; i++) { 6804 const PetscInt gr = rmap[i]; 6805 const PetscInt nz = ii[i+1] - ii[i]; 6806 if (gr < rs || gr >= re) ncoo_o += nz; 6807 else ncoo_oown += nz; 6808 } 6809 } else ncoo_d += mm->nz; 6810 } 6811 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); 6812 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr); 6813 if (hasoffproc) { /* handle offproc values insertion */ 6814 PetscSF msf; 6815 PetscInt ncoo2,*coo_i2,*coo_j2; 6816 6817 ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr); 6818 ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr); 6819 ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); 6820 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 6821 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6822 PetscInt *idxoff = mmdata->off[cp]; 6823 PetscInt *idxown = mmdata->own[cp]; 6824 if (!mptmp[cp] && rmapt[cp] == 2) { 6825 const PetscInt *rmap = rmapa[cp]; 6826 const PetscInt *cmap = cmapa[cp]; 6827 const PetscInt *ii = mm->i; 6828 PetscInt *coi = coo_i + ncoo_o; 6829 PetscInt *coj = coo_j + ncoo_o; 6830 const PetscInt mr = mp[cp]->rmap->n; 6831 const PetscInt rs = C->rmap->rstart; 6832 const PetscInt re = C->rmap->rend; 6833 const PetscInt cs = C->cmap->rstart; 6834 for (i = 0; i < mr; i++) { 6835 const PetscInt *jj = mm->j + ii[i]; 6836 const PetscInt gr = rmap[i]; 6837 const PetscInt nz = ii[i+1] - ii[i]; 6838 if (gr < rs || gr >= re) { 6839 for (j = ii[i]; j < ii[i+1]; j++) { 6840 *coi++ = gr; 6841 *idxoff++ = j; 6842 } 6843 if (!cmapt[cp]) { /* already global */ 6844 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6845 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6846 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6847 } else { /* offdiag */ 6848 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6849 } 6850 ncoo_o += nz; 6851 } else { 6852 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 6853 } 6854 } 6855 } 6856 mmdata->off[cp + 1] = idxoff; 6857 mmdata->own[cp + 1] = idxown; 6858 } 6859 6860 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6861 ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o,NULL,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr); 6862 ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr); 6863 ierr = PetscSFGetGraph(msf,&ncoo2,NULL,NULL,NULL);CHKERRQ(ierr); 6864 ncoo = ncoo_d + ncoo_oown + ncoo2; 6865 ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr); 6866 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6867 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6868 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6869 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6870 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6871 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr); 6872 coo_i = coo_i2; 6873 coo_j = coo_j2; 6874 } else { /* no offproc values insertion */ 6875 ncoo = ncoo_d; 6876 ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr); 6877 6878 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6879 ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr); 6880 ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr); 6881 } 6882 mmdata->hasoffproc = hasoffproc; 6883 6884 /* on-process indices */ 6885 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 6886 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6887 PetscInt *coi = coo_i + ncoo_d; 6888 PetscInt *coj = coo_j + ncoo_d; 6889 const PetscInt *jj = mm->j; 6890 const PetscInt *ii = mm->i; 6891 const PetscInt *cmap = cmapa[cp]; 6892 const PetscInt *rmap = rmapa[cp]; 6893 const PetscInt mr = mp[cp]->rmap->n; 6894 const PetscInt rs = C->rmap->rstart; 6895 const PetscInt re = C->rmap->rend; 6896 const PetscInt cs = C->cmap->rstart; 6897 6898 if (mptmp[cp]) continue; 6899 if (rmapt[cp] == 1) { 6900 for (i = 0; i < mr; i++) { 6901 const PetscInt gr = i + rs; 6902 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 6903 } 6904 /* columns coo */ 6905 if (!cmapt[cp]) { 6906 ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr); 6907 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6908 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; 6909 } else { /* offdiag */ 6910 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 6911 } 6912 ncoo_d += mm->nz; 6913 } else if (rmapt[cp] == 2) { 6914 for (i = 0; i < mr; i++) { 6915 const PetscInt *jj = mm->j + ii[i]; 6916 const PetscInt gr = rmap[i]; 6917 const PetscInt nz = ii[i+1] - ii[i]; 6918 if (gr >= rs && gr < re) { 6919 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 6920 if (!cmapt[cp]) { /* already global */ 6921 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6922 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6923 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6924 } else { /* offdiag */ 6925 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6926 } 6927 ncoo_d += nz; 6928 } 6929 } 6930 } 6931 } 6932 if (glob) { 6933 ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr); 6934 } 6935 ierr = ISDestroy(&glob);CHKERRQ(ierr); 6936 if (P_oth_l2g) { 6937 ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6938 } 6939 ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr); 6940 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr); 6941 6942 /* preallocate with COO data */ 6943 ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr); 6944 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6945 PetscFunctionReturn(0); 6946 } 6947 6948 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 6949 { 6950 Mat_Product *product = mat->product; 6951 PetscErrorCode ierr; 6952 #if defined(PETSC_HAVE_DEVICE) 6953 PetscBool match = PETSC_FALSE; 6954 PetscBool usecpu = PETSC_FALSE; 6955 #else 6956 PetscBool match = PETSC_TRUE; 6957 #endif 6958 6959 PetscFunctionBegin; 6960 MatCheckProduct(mat,1); 6961 #if defined(PETSC_HAVE_DEVICE) 6962 if (!product->A->boundtocpu && !product->B->boundtocpu) { 6963 ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr); 6964 } 6965 if (match) { /* we can always fallback to the CPU if requested */ 6966 switch (product->type) { 6967 case MATPRODUCT_AB: 6968 if (product->api_user) { 6969 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 6970 ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6971 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6972 } else { 6973 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 6974 ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6975 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6976 } 6977 break; 6978 case MATPRODUCT_AtB: 6979 if (product->api_user) { 6980 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr); 6981 ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6982 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6983 } else { 6984 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr); 6985 ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6986 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6987 } 6988 break; 6989 case MATPRODUCT_PtAP: 6990 if (product->api_user) { 6991 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 6992 ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6993 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6994 } else { 6995 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 6996 ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6997 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6998 } 6999 break; 7000 default: 7001 break; 7002 } 7003 match = (PetscBool)!usecpu; 7004 } 7005 #endif 7006 if (match) { 7007 switch (product->type) { 7008 case MATPRODUCT_AB: 7009 case MATPRODUCT_AtB: 7010 case MATPRODUCT_PtAP: 7011 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7012 break; 7013 default: 7014 break; 7015 } 7016 } 7017 /* fallback to MPIAIJ ops */ 7018 if (!mat->ops->productsymbolic) { 7019 ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr); 7020 } 7021 PetscFunctionReturn(0); 7022 } 7023