1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb,*aav,*bav; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = NULL; 92 93 ia = a->i; 94 ib = b->i; 95 ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr); 96 ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr); 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) { 101 cnt++; 102 goto ok1; 103 } 104 aa = aav + ia[i]; 105 for (j=0; j<na; j++) { 106 if (aa[j] != 0.0) goto ok1; 107 } 108 bb = bav + ib[i]; 109 for (j=0; j <nb; j++) { 110 if (bb[j] != 0.0) goto ok1; 111 } 112 cnt++; 113 ok1:; 114 } 115 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 116 if (!n0rows) { 117 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 118 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 122 cnt = 0; 123 for (i=0; i<m; i++) { 124 na = ia[i+1] - ia[i]; 125 nb = ib[i+1] - ib[i]; 126 if (!na && !nb) continue; 127 aa = aav + ia[i]; 128 for (j=0; j<na;j++) { 129 if (aa[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 bb = bav + ib[i]; 135 for (j=0; j<nb; j++) { 136 if (bb[j] != 0.0) { 137 rows[cnt++] = rstart + i; 138 goto ok2; 139 } 140 } 141 ok2:; 142 } 143 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 144 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 145 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 150 { 151 PetscErrorCode ierr; 152 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 153 PetscBool cong; 154 155 PetscFunctionBegin; 156 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 157 if (Y->assembled && cong) { 158 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 159 } else { 160 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 161 } 162 PetscFunctionReturn(0); 163 } 164 165 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 166 { 167 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 168 PetscErrorCode ierr; 169 PetscInt i,rstart,nrows,*rows; 170 171 PetscFunctionBegin; 172 *zrows = NULL; 173 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 174 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 175 for (i=0; i<nrows; i++) rows[i] += rstart; 176 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 181 { 182 PetscErrorCode ierr; 183 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 184 PetscInt i,n,*garray = aij->garray; 185 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 186 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 187 PetscReal *work; 188 const PetscScalar *dummy; 189 190 PetscFunctionBegin; 191 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 192 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 193 ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr); 194 ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr); 195 ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr); 196 ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr); 197 if (type == NORM_2) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 203 } 204 } else if (type == NORM_1) { 205 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 206 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 207 } 208 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 209 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 210 } 211 } else if (type == NORM_INFINITY) { 212 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 213 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 214 } 215 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 216 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 217 } 218 219 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 220 if (type == NORM_INFINITY) { 221 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 222 } else { 223 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 224 } 225 ierr = PetscFree(work);CHKERRQ(ierr); 226 if (type == NORM_2) { 227 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 228 } 229 PetscFunctionReturn(0); 230 } 231 232 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 233 { 234 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 235 IS sis,gis; 236 PetscErrorCode ierr; 237 const PetscInt *isis,*igis; 238 PetscInt n,*iis,nsis,ngis,rstart,i; 239 240 PetscFunctionBegin; 241 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 242 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 243 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 244 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 245 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 246 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 247 248 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 249 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 250 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 251 n = ngis + nsis; 252 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 253 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 254 for (i=0; i<n; i++) iis[i] += rstart; 255 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 256 257 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 258 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 259 ierr = ISDestroy(&sis);CHKERRQ(ierr); 260 ierr = ISDestroy(&gis);CHKERRQ(ierr); 261 PetscFunctionReturn(0); 262 } 263 264 /* 265 Local utility routine that creates a mapping from the global column 266 number to the local number in the off-diagonal part of the local 267 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 268 a slightly higher hash table cost; without it it is not scalable (each processor 269 has an order N integer array but is fast to access. 270 */ 271 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 272 { 273 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 274 PetscErrorCode ierr; 275 PetscInt n = aij->B->cmap->n,i; 276 277 PetscFunctionBegin; 278 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 279 #if defined(PETSC_USE_CTABLE) 280 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 281 for (i=0; i<n; i++) { 282 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 283 } 284 #else 285 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 286 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 287 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 288 #endif 289 PetscFunctionReturn(0); 290 } 291 292 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 293 { \ 294 if (col <= lastcol1) low1 = 0; \ 295 else high1 = nrow1; \ 296 lastcol1 = col;\ 297 while (high1-low1 > 5) { \ 298 t = (low1+high1)/2; \ 299 if (rp1[t] > col) high1 = t; \ 300 else low1 = t; \ 301 } \ 302 for (_i=low1; _i<high1; _i++) { \ 303 if (rp1[_i] > col) break; \ 304 if (rp1[_i] == col) { \ 305 if (addv == ADD_VALUES) { \ 306 ap1[_i] += value; \ 307 /* Not sure LogFlops will slow dow the code or not */ \ 308 (void)PetscLogFlops(1.0); \ 309 } \ 310 else ap1[_i] = value; \ 311 inserted = PETSC_TRUE; \ 312 goto a_noinsert; \ 313 } \ 314 } \ 315 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 316 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 317 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 318 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 319 N = nrow1++ - 1; a->nz++; high1++; \ 320 /* shift up all the later entries in this row */ \ 321 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 322 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 323 rp1[_i] = col; \ 324 ap1[_i] = value; \ 325 A->nonzerostate++;\ 326 a_noinsert: ; \ 327 ailen[row] = nrow1; \ 328 } 329 330 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 331 { \ 332 if (col <= lastcol2) low2 = 0; \ 333 else high2 = nrow2; \ 334 lastcol2 = col; \ 335 while (high2-low2 > 5) { \ 336 t = (low2+high2)/2; \ 337 if (rp2[t] > col) high2 = t; \ 338 else low2 = t; \ 339 } \ 340 for (_i=low2; _i<high2; _i++) { \ 341 if (rp2[_i] > col) break; \ 342 if (rp2[_i] == col) { \ 343 if (addv == ADD_VALUES) { \ 344 ap2[_i] += value; \ 345 (void)PetscLogFlops(1.0); \ 346 } \ 347 else ap2[_i] = value; \ 348 inserted = PETSC_TRUE; \ 349 goto b_noinsert; \ 350 } \ 351 } \ 352 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 353 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 354 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 355 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 356 N = nrow2++ - 1; b->nz++; high2++; \ 357 /* shift up all the later entries in this row */ \ 358 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 359 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 360 rp2[_i] = col; \ 361 ap2[_i] = value; \ 362 B->nonzerostate++; \ 363 b_noinsert: ; \ 364 bilen[row] = nrow2; \ 365 } 366 367 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 368 { 369 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 370 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 371 PetscErrorCode ierr; 372 PetscInt l,*garray = mat->garray,diag; 373 374 PetscFunctionBegin; 375 /* code only works for square matrices A */ 376 377 /* find size of row to the left of the diagonal part */ 378 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 379 row = row - diag; 380 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 381 if (garray[b->j[b->i[row]+l]] > diag) break; 382 } 383 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 384 385 /* diagonal part */ 386 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 387 388 /* right of diagonal part */ 389 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 390 #if defined(PETSC_HAVE_DEVICE) 391 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 392 #endif 393 PetscFunctionReturn(0); 394 } 395 396 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 397 { 398 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 399 PetscScalar value = 0.0; 400 PetscErrorCode ierr; 401 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 402 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 403 PetscBool roworiented = aij->roworiented; 404 405 /* Some Variables required in the macro */ 406 Mat A = aij->A; 407 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 408 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 409 PetscBool ignorezeroentries = a->ignorezeroentries; 410 Mat B = aij->B; 411 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 412 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 413 MatScalar *aa,*ba; 414 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 415 * cannot use "#if defined" inside a macro. */ 416 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 417 418 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 419 PetscInt nonew; 420 MatScalar *ap1,*ap2; 421 422 PetscFunctionBegin; 423 #if defined(PETSC_HAVE_DEVICE) 424 if (A->offloadmask == PETSC_OFFLOAD_GPU) { 425 const PetscScalar *dummy; 426 ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr); 427 ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr); 428 } 429 if (B->offloadmask == PETSC_OFFLOAD_GPU) { 430 const PetscScalar *dummy; 431 ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr); 432 ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr); 433 } 434 #endif 435 aa = a->a; 436 ba = b->a; 437 for (i=0; i<m; i++) { 438 if (im[i] < 0) continue; 439 if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 440 if (im[i] >= rstart && im[i] < rend) { 441 row = im[i] - rstart; 442 lastcol1 = -1; 443 rp1 = aj + ai[row]; 444 ap1 = aa + ai[row]; 445 rmax1 = aimax[row]; 446 nrow1 = ailen[row]; 447 low1 = 0; 448 high1 = nrow1; 449 lastcol2 = -1; 450 rp2 = bj + bi[row]; 451 ap2 = ba + bi[row]; 452 rmax2 = bimax[row]; 453 nrow2 = bilen[row]; 454 low2 = 0; 455 high2 = nrow2; 456 457 for (j=0; j<n; j++) { 458 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 459 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 460 if (in[j] >= cstart && in[j] < cend) { 461 col = in[j] - cstart; 462 nonew = a->nonew; 463 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 464 #if defined(PETSC_HAVE_DEVICE) 465 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 466 #endif 467 } else if (in[j] < 0) continue; 468 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 469 else { 470 if (mat->was_assembled) { 471 if (!aij->colmap) { 472 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 473 } 474 #if defined(PETSC_USE_CTABLE) 475 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 476 col--; 477 #else 478 col = aij->colmap[in[j]] - 1; 479 #endif 480 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 481 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 482 col = in[j]; 483 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 484 B = aij->B; 485 b = (Mat_SeqAIJ*)B->data; 486 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 487 rp2 = bj + bi[row]; 488 ap2 = ba + bi[row]; 489 rmax2 = bimax[row]; 490 nrow2 = bilen[row]; 491 low2 = 0; 492 high2 = nrow2; 493 bm = aij->B->rmap->n; 494 ba = b->a; 495 inserted = PETSC_FALSE; 496 } else if (col < 0) { 497 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 498 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 499 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 500 } 501 } else col = in[j]; 502 nonew = b->nonew; 503 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 504 #if defined(PETSC_HAVE_DEVICE) 505 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 506 #endif 507 } 508 } 509 } else { 510 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 511 if (!aij->donotstash) { 512 mat->assembled = PETSC_FALSE; 513 if (roworiented) { 514 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 515 } else { 516 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 517 } 518 } 519 } 520 } 521 PetscFunctionReturn(0); 522 } 523 524 /* 525 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 526 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 527 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 528 */ 529 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 530 { 531 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 532 Mat A = aij->A; /* diagonal part of the matrix */ 533 Mat B = aij->B; /* offdiagonal part of the matrix */ 534 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 535 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 536 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 537 PetscInt *ailen = a->ilen,*aj = a->j; 538 PetscInt *bilen = b->ilen,*bj = b->j; 539 PetscInt am = aij->A->rmap->n,j; 540 PetscInt diag_so_far = 0,dnz; 541 PetscInt offd_so_far = 0,onz; 542 543 PetscFunctionBegin; 544 /* Iterate over all rows of the matrix */ 545 for (j=0; j<am; j++) { 546 dnz = onz = 0; 547 /* Iterate over all non-zero columns of the current row */ 548 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 549 /* If column is in the diagonal */ 550 if (mat_j[col] >= cstart && mat_j[col] < cend) { 551 aj[diag_so_far++] = mat_j[col] - cstart; 552 dnz++; 553 } else { /* off-diagonal entries */ 554 bj[offd_so_far++] = mat_j[col]; 555 onz++; 556 } 557 } 558 ailen[j] = dnz; 559 bilen[j] = onz; 560 } 561 PetscFunctionReturn(0); 562 } 563 564 /* 565 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 566 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 567 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 568 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 569 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 570 */ 571 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 572 { 573 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 574 Mat A = aij->A; /* diagonal part of the matrix */ 575 Mat B = aij->B; /* offdiagonal part of the matrix */ 576 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 577 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 578 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 579 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 580 PetscInt *ailen = a->ilen,*aj = a->j; 581 PetscInt *bilen = b->ilen,*bj = b->j; 582 PetscInt am = aij->A->rmap->n,j; 583 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 584 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 585 PetscScalar *aa = a->a,*ba = b->a; 586 587 PetscFunctionBegin; 588 /* Iterate over all rows of the matrix */ 589 for (j=0; j<am; j++) { 590 dnz_row = onz_row = 0; 591 rowstart_offd = full_offd_i[j]; 592 rowstart_diag = full_diag_i[j]; 593 /* Iterate over all non-zero columns of the current row */ 594 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 595 /* If column is in the diagonal */ 596 if (mat_j[col] >= cstart && mat_j[col] < cend) { 597 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 598 aa[rowstart_diag+dnz_row] = mat_a[col]; 599 dnz_row++; 600 } else { /* off-diagonal entries */ 601 bj[rowstart_offd+onz_row] = mat_j[col]; 602 ba[rowstart_offd+onz_row] = mat_a[col]; 603 onz_row++; 604 } 605 } 606 ailen[j] = dnz_row; 607 bilen[j] = onz_row; 608 } 609 PetscFunctionReturn(0); 610 } 611 612 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 613 { 614 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 615 PetscErrorCode ierr; 616 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 617 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 618 619 PetscFunctionBegin; 620 for (i=0; i<m; i++) { 621 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 622 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 623 if (idxm[i] >= rstart && idxm[i] < rend) { 624 row = idxm[i] - rstart; 625 for (j=0; j<n; j++) { 626 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 627 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 628 if (idxn[j] >= cstart && idxn[j] < cend) { 629 col = idxn[j] - cstart; 630 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 631 } else { 632 if (!aij->colmap) { 633 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 634 } 635 #if defined(PETSC_USE_CTABLE) 636 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 637 col--; 638 #else 639 col = aij->colmap[idxn[j]] - 1; 640 #endif 641 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 642 else { 643 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 644 } 645 } 646 } 647 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 648 } 649 PetscFunctionReturn(0); 650 } 651 652 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 653 { 654 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 655 PetscErrorCode ierr; 656 PetscInt nstash,reallocs; 657 658 PetscFunctionBegin; 659 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 660 661 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 662 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 663 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 668 { 669 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 670 PetscErrorCode ierr; 671 PetscMPIInt n; 672 PetscInt i,j,rstart,ncols,flg; 673 PetscInt *row,*col; 674 PetscBool other_disassembled; 675 PetscScalar *val; 676 677 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 678 679 PetscFunctionBegin; 680 if (!aij->donotstash && !mat->nooffprocentries) { 681 while (1) { 682 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 683 if (!flg) break; 684 685 for (i=0; i<n;) { 686 /* Now identify the consecutive vals belonging to the same row */ 687 for (j=i,rstart=row[j]; j<n; j++) { 688 if (row[j] != rstart) break; 689 } 690 if (j < n) ncols = j-i; 691 else ncols = n-i; 692 /* Now assemble all these values with a single function call */ 693 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 694 i = j; 695 } 696 } 697 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 698 } 699 #if defined(PETSC_HAVE_DEVICE) 700 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 701 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 702 if (mat->boundtocpu) { 703 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 704 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 705 } 706 #endif 707 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 708 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 709 710 /* determine if any processor has disassembled, if so we must 711 also disassemble ourself, in order that we may reassemble. */ 712 /* 713 if nonzero structure of submatrix B cannot change then we know that 714 no processor disassembled thus we can skip this stuff 715 */ 716 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 717 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 718 if (mat->was_assembled && !other_disassembled) { 719 #if defined(PETSC_HAVE_DEVICE) 720 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 721 #endif 722 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 723 } 724 } 725 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 726 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 727 } 728 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 729 #if defined(PETSC_HAVE_DEVICE) 730 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 731 #endif 732 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 733 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 734 735 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 736 737 aij->rowvalues = NULL; 738 739 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 740 741 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 742 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 743 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 744 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 745 } 746 #if defined(PETSC_HAVE_DEVICE) 747 mat->offloadmask = PETSC_OFFLOAD_BOTH; 748 #endif 749 PetscFunctionReturn(0); 750 } 751 752 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 753 { 754 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 755 PetscErrorCode ierr; 756 757 PetscFunctionBegin; 758 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 759 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 760 PetscFunctionReturn(0); 761 } 762 763 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 764 { 765 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 766 PetscObjectState sA, sB; 767 PetscInt *lrows; 768 PetscInt r, len; 769 PetscBool cong, lch, gch; 770 PetscErrorCode ierr; 771 772 PetscFunctionBegin; 773 /* get locally owned rows */ 774 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 775 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 776 /* fix right hand side if needed */ 777 if (x && b) { 778 const PetscScalar *xx; 779 PetscScalar *bb; 780 781 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 782 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 783 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 784 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 785 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 786 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 787 } 788 789 sA = mat->A->nonzerostate; 790 sB = mat->B->nonzerostate; 791 792 if (diag != 0.0 && cong) { 793 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 794 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 795 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 796 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 797 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 798 PetscInt nnwA, nnwB; 799 PetscBool nnzA, nnzB; 800 801 nnwA = aijA->nonew; 802 nnwB = aijB->nonew; 803 nnzA = aijA->keepnonzeropattern; 804 nnzB = aijB->keepnonzeropattern; 805 if (!nnzA) { 806 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 807 aijA->nonew = 0; 808 } 809 if (!nnzB) { 810 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 811 aijB->nonew = 0; 812 } 813 /* Must zero here before the next loop */ 814 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 815 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 816 for (r = 0; r < len; ++r) { 817 const PetscInt row = lrows[r] + A->rmap->rstart; 818 if (row >= A->cmap->N) continue; 819 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 820 } 821 aijA->nonew = nnwA; 822 aijB->nonew = nnwB; 823 } else { 824 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 825 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 826 } 827 ierr = PetscFree(lrows);CHKERRQ(ierr); 828 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 829 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 830 831 /* reduce nonzerostate */ 832 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 833 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 834 if (gch) A->nonzerostate++; 835 PetscFunctionReturn(0); 836 } 837 838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 839 { 840 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 841 PetscErrorCode ierr; 842 PetscMPIInt n = A->rmap->n; 843 PetscInt i,j,r,m,len = 0; 844 PetscInt *lrows,*owners = A->rmap->range; 845 PetscMPIInt p = 0; 846 PetscSFNode *rrows; 847 PetscSF sf; 848 const PetscScalar *xx; 849 PetscScalar *bb,*mask; 850 Vec xmask,lmask; 851 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 852 const PetscInt *aj, *ii,*ridx; 853 PetscScalar *aa; 854 855 PetscFunctionBegin; 856 /* Create SF where leaves are input rows and roots are owned rows */ 857 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 858 for (r = 0; r < n; ++r) lrows[r] = -1; 859 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 860 for (r = 0; r < N; ++r) { 861 const PetscInt idx = rows[r]; 862 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 863 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 864 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 865 } 866 rrows[r].rank = p; 867 rrows[r].index = rows[r] - owners[p]; 868 } 869 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 870 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 871 /* Collect flags for rows to be zeroed */ 872 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 873 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 874 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 875 /* Compress and put in row numbers */ 876 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 877 /* zero diagonal part of matrix */ 878 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 879 /* handle off diagonal part of matrix */ 880 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 881 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 882 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 883 for (i=0; i<len; i++) bb[lrows[i]] = 1; 884 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 885 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 887 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 888 if (x && b) { /* this code is buggy when the row and column layout don't match */ 889 PetscBool cong; 890 891 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 892 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 893 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 894 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 895 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 896 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 897 } 898 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 899 /* remove zeroed rows of off diagonal matrix */ 900 ii = aij->i; 901 for (i=0; i<len; i++) { 902 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 903 } 904 /* loop over all elements of off process part of matrix zeroing removed columns*/ 905 if (aij->compressedrow.use) { 906 m = aij->compressedrow.nrows; 907 ii = aij->compressedrow.i; 908 ridx = aij->compressedrow.rindex; 909 for (i=0; i<m; i++) { 910 n = ii[i+1] - ii[i]; 911 aj = aij->j + ii[i]; 912 aa = aij->a + ii[i]; 913 914 for (j=0; j<n; j++) { 915 if (PetscAbsScalar(mask[*aj])) { 916 if (b) bb[*ridx] -= *aa*xx[*aj]; 917 *aa = 0.0; 918 } 919 aa++; 920 aj++; 921 } 922 ridx++; 923 } 924 } else { /* do not use compressed row format */ 925 m = l->B->rmap->n; 926 for (i=0; i<m; i++) { 927 n = ii[i+1] - ii[i]; 928 aj = aij->j + ii[i]; 929 aa = aij->a + ii[i]; 930 for (j=0; j<n; j++) { 931 if (PetscAbsScalar(mask[*aj])) { 932 if (b) bb[i] -= *aa*xx[*aj]; 933 *aa = 0.0; 934 } 935 aa++; 936 aj++; 937 } 938 } 939 } 940 if (x && b) { 941 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 942 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 943 } 944 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 945 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 946 ierr = PetscFree(lrows);CHKERRQ(ierr); 947 948 /* only change matrix nonzero state if pattern was allowed to be changed */ 949 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 950 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 951 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 952 } 953 PetscFunctionReturn(0); 954 } 955 956 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 957 { 958 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 959 PetscErrorCode ierr; 960 PetscInt nt; 961 VecScatter Mvctx = a->Mvctx; 962 963 PetscFunctionBegin; 964 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 965 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 966 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 967 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 968 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 969 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } 972 973 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 974 { 975 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 976 PetscErrorCode ierr; 977 978 PetscFunctionBegin; 979 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 980 PetscFunctionReturn(0); 981 } 982 983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 984 { 985 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 986 PetscErrorCode ierr; 987 VecScatter Mvctx = a->Mvctx; 988 989 PetscFunctionBegin; 990 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 991 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 992 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 993 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } 996 997 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 998 { 999 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1000 PetscErrorCode ierr; 1001 1002 PetscFunctionBegin; 1003 /* do nondiagonal part */ 1004 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1005 /* do local part */ 1006 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1007 /* add partial results together */ 1008 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1009 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } 1012 1013 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1014 { 1015 MPI_Comm comm; 1016 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1017 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1018 IS Me,Notme; 1019 PetscErrorCode ierr; 1020 PetscInt M,N,first,last,*notme,i; 1021 PetscBool lf; 1022 PetscMPIInt size; 1023 1024 PetscFunctionBegin; 1025 /* Easy test: symmetric diagonal block */ 1026 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1027 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1028 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1029 if (!*f) PetscFunctionReturn(0); 1030 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1031 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 1032 if (size == 1) PetscFunctionReturn(0); 1033 1034 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1035 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1036 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1037 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1038 for (i=0; i<first; i++) notme[i] = i; 1039 for (i=last; i<M; i++) notme[i-last+first] = i; 1040 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1041 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1042 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1043 Aoff = Aoffs[0]; 1044 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1045 Boff = Boffs[0]; 1046 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1047 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1048 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1049 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1050 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1051 ierr = PetscFree(notme);CHKERRQ(ierr); 1052 PetscFunctionReturn(0); 1053 } 1054 1055 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1056 { 1057 PetscErrorCode ierr; 1058 1059 PetscFunctionBegin; 1060 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1061 PetscFunctionReturn(0); 1062 } 1063 1064 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1065 { 1066 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1067 PetscErrorCode ierr; 1068 1069 PetscFunctionBegin; 1070 /* do nondiagonal part */ 1071 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1072 /* do local part */ 1073 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1074 /* add partial results together */ 1075 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1076 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1077 PetscFunctionReturn(0); 1078 } 1079 1080 /* 1081 This only works correctly for square matrices where the subblock A->A is the 1082 diagonal block 1083 */ 1084 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1085 { 1086 PetscErrorCode ierr; 1087 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1088 1089 PetscFunctionBegin; 1090 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1091 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1092 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1097 { 1098 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1099 PetscErrorCode ierr; 1100 1101 PetscFunctionBegin; 1102 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1103 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1108 { 1109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1110 PetscErrorCode ierr; 1111 1112 PetscFunctionBegin; 1113 #if defined(PETSC_USE_LOG) 1114 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1115 #endif 1116 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1117 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1118 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1119 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1120 #if defined(PETSC_USE_CTABLE) 1121 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1122 #else 1123 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1124 #endif 1125 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1126 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1127 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1128 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1129 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1130 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1131 1132 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1133 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1134 1135 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1136 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1137 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1138 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1139 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1140 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1141 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1142 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1143 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1144 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1145 #if defined(PETSC_HAVE_CUDA) 1146 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1147 #endif 1148 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1149 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr); 1150 #endif 1151 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 1152 #if defined(PETSC_HAVE_ELEMENTAL) 1153 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1154 #endif 1155 #if defined(PETSC_HAVE_SCALAPACK) 1156 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1157 #endif 1158 #if defined(PETSC_HAVE_HYPRE) 1159 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1160 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1161 #endif 1162 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1163 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1164 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1165 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr); 1168 #if defined(PETSC_HAVE_MKL_SPARSE) 1169 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr); 1170 #endif 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr); 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1178 { 1179 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1180 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1181 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1182 const PetscInt *garray = aij->garray; 1183 const PetscScalar *aa,*ba; 1184 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1185 PetscInt *rowlens; 1186 PetscInt *colidxs; 1187 PetscScalar *matvals; 1188 PetscErrorCode ierr; 1189 1190 PetscFunctionBegin; 1191 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1192 1193 M = mat->rmap->N; 1194 N = mat->cmap->N; 1195 m = mat->rmap->n; 1196 rs = mat->rmap->rstart; 1197 cs = mat->cmap->rstart; 1198 nz = A->nz + B->nz; 1199 1200 /* write matrix header */ 1201 header[0] = MAT_FILE_CLASSID; 1202 header[1] = M; header[2] = N; header[3] = nz; 1203 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1204 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1205 1206 /* fill in and store row lengths */ 1207 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1208 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1209 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1210 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1211 1212 /* fill in and store column indices */ 1213 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1214 for (cnt=0, i=0; i<m; i++) { 1215 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1216 if (garray[B->j[jb]] > cs) break; 1217 colidxs[cnt++] = garray[B->j[jb]]; 1218 } 1219 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1220 colidxs[cnt++] = A->j[ja] + cs; 1221 for (; jb<B->i[i+1]; jb++) 1222 colidxs[cnt++] = garray[B->j[jb]]; 1223 } 1224 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1225 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1226 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1227 1228 /* fill in and store nonzero values */ 1229 ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr); 1230 ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr); 1231 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1232 for (cnt=0, i=0; i<m; i++) { 1233 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1234 if (garray[B->j[jb]] > cs) break; 1235 matvals[cnt++] = ba[jb]; 1236 } 1237 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1238 matvals[cnt++] = aa[ja]; 1239 for (; jb<B->i[i+1]; jb++) 1240 matvals[cnt++] = ba[jb]; 1241 } 1242 ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr); 1243 ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr); 1244 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1245 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1246 ierr = PetscFree(matvals);CHKERRQ(ierr); 1247 1248 /* write block size option to the viewer's .info file */ 1249 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #include <petscdraw.h> 1254 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1255 { 1256 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1257 PetscErrorCode ierr; 1258 PetscMPIInt rank = aij->rank,size = aij->size; 1259 PetscBool isdraw,iascii,isbinary; 1260 PetscViewer sviewer; 1261 PetscViewerFormat format; 1262 1263 PetscFunctionBegin; 1264 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1265 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1266 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1267 if (iascii) { 1268 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1269 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1270 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1271 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1272 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1273 for (i=0; i<(PetscInt)size; i++) { 1274 nmax = PetscMax(nmax,nz[i]); 1275 nmin = PetscMin(nmin,nz[i]); 1276 navg += nz[i]; 1277 } 1278 ierr = PetscFree(nz);CHKERRQ(ierr); 1279 navg = navg/size; 1280 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1281 PetscFunctionReturn(0); 1282 } 1283 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1284 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1285 MatInfo info; 1286 PetscBool inodes; 1287 1288 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 1289 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1290 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1291 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1292 if (!inodes) { 1293 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1294 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1295 } else { 1296 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1297 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1298 } 1299 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1300 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1301 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1302 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1303 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1304 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1305 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1306 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1309 PetscInt inodecount,inodelimit,*inodes; 1310 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1311 if (inodes) { 1312 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1313 } else { 1314 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1315 } 1316 PetscFunctionReturn(0); 1317 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1318 PetscFunctionReturn(0); 1319 } 1320 } else if (isbinary) { 1321 if (size == 1) { 1322 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1323 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1324 } else { 1325 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1326 } 1327 PetscFunctionReturn(0); 1328 } else if (iascii && size == 1) { 1329 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1330 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1331 PetscFunctionReturn(0); 1332 } else if (isdraw) { 1333 PetscDraw draw; 1334 PetscBool isnull; 1335 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1336 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1337 if (isnull) PetscFunctionReturn(0); 1338 } 1339 1340 { /* assemble the entire matrix onto first processor */ 1341 Mat A = NULL, Av; 1342 IS isrow,iscol; 1343 1344 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1345 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1346 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1347 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1348 /* The commented code uses MatCreateSubMatrices instead */ 1349 /* 1350 Mat *AA, A = NULL, Av; 1351 IS isrow,iscol; 1352 1353 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1354 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1355 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1356 if (!rank) { 1357 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1358 A = AA[0]; 1359 Av = AA[0]; 1360 } 1361 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1362 */ 1363 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1364 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1365 /* 1366 Everyone has to call to draw the matrix since the graphics waits are 1367 synchronized across all processors that share the PetscDraw object 1368 */ 1369 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1370 if (!rank) { 1371 if (((PetscObject)mat)->name) { 1372 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1373 } 1374 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1375 } 1376 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1377 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1378 ierr = MatDestroy(&A);CHKERRQ(ierr); 1379 } 1380 PetscFunctionReturn(0); 1381 } 1382 1383 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1384 { 1385 PetscErrorCode ierr; 1386 PetscBool iascii,isdraw,issocket,isbinary; 1387 1388 PetscFunctionBegin; 1389 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1390 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1391 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1392 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1393 if (iascii || isdraw || isbinary || issocket) { 1394 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1395 } 1396 PetscFunctionReturn(0); 1397 } 1398 1399 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1400 { 1401 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1402 PetscErrorCode ierr; 1403 Vec bb1 = NULL; 1404 PetscBool hasop; 1405 1406 PetscFunctionBegin; 1407 if (flag == SOR_APPLY_UPPER) { 1408 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1409 PetscFunctionReturn(0); 1410 } 1411 1412 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1413 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1414 } 1415 1416 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1417 if (flag & SOR_ZERO_INITIAL_GUESS) { 1418 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1419 its--; 1420 } 1421 1422 while (its--) { 1423 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1424 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1425 1426 /* update rhs: bb1 = bb - B*x */ 1427 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1428 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1429 1430 /* local sweep */ 1431 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1432 } 1433 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1434 if (flag & SOR_ZERO_INITIAL_GUESS) { 1435 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1436 its--; 1437 } 1438 while (its--) { 1439 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1440 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1441 1442 /* update rhs: bb1 = bb - B*x */ 1443 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1444 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1445 1446 /* local sweep */ 1447 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1448 } 1449 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1450 if (flag & SOR_ZERO_INITIAL_GUESS) { 1451 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1452 its--; 1453 } 1454 while (its--) { 1455 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1456 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1457 1458 /* update rhs: bb1 = bb - B*x */ 1459 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1460 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1461 1462 /* local sweep */ 1463 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1464 } 1465 } else if (flag & SOR_EISENSTAT) { 1466 Vec xx1; 1467 1468 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1469 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1470 1471 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1472 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1473 if (!mat->diag) { 1474 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1475 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1476 } 1477 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1478 if (hasop) { 1479 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1480 } else { 1481 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1482 } 1483 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1484 1485 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1486 1487 /* local sweep */ 1488 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1489 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1490 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1491 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1492 1493 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1494 1495 matin->factorerrortype = mat->A->factorerrortype; 1496 PetscFunctionReturn(0); 1497 } 1498 1499 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1500 { 1501 Mat aA,aB,Aperm; 1502 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1503 PetscScalar *aa,*ba; 1504 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1505 PetscSF rowsf,sf; 1506 IS parcolp = NULL; 1507 PetscBool done; 1508 PetscErrorCode ierr; 1509 1510 PetscFunctionBegin; 1511 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1512 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1513 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1514 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1515 1516 /* Invert row permutation to find out where my rows should go */ 1517 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1518 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1519 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1520 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1521 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1522 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1523 1524 /* Invert column permutation to find out where my columns should go */ 1525 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1526 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1527 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1528 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1529 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1530 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1531 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1532 1533 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1534 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1535 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1536 1537 /* Find out where my gcols should go */ 1538 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1539 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1540 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1541 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1542 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1543 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1544 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1545 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1546 1547 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1548 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1549 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1550 for (i=0; i<m; i++) { 1551 PetscInt row = rdest[i]; 1552 PetscMPIInt rowner; 1553 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1554 for (j=ai[i]; j<ai[i+1]; j++) { 1555 PetscInt col = cdest[aj[j]]; 1556 PetscMPIInt cowner; 1557 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1558 if (rowner == cowner) dnnz[i]++; 1559 else onnz[i]++; 1560 } 1561 for (j=bi[i]; j<bi[i+1]; j++) { 1562 PetscInt col = gcdest[bj[j]]; 1563 PetscMPIInt cowner; 1564 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1565 if (rowner == cowner) dnnz[i]++; 1566 else onnz[i]++; 1567 } 1568 } 1569 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1570 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1571 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1572 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1573 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1574 1575 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1576 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1577 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1578 for (i=0; i<m; i++) { 1579 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1580 PetscInt j0,rowlen; 1581 rowlen = ai[i+1] - ai[i]; 1582 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1583 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1584 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1585 } 1586 rowlen = bi[i+1] - bi[i]; 1587 for (j0=j=0; j<rowlen; j0=j) { 1588 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1589 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1590 } 1591 } 1592 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1593 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1594 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1595 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1596 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1597 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1598 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1599 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1600 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1601 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1602 *B = Aperm; 1603 PetscFunctionReturn(0); 1604 } 1605 1606 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1607 { 1608 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1609 PetscErrorCode ierr; 1610 1611 PetscFunctionBegin; 1612 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1613 if (ghosts) *ghosts = aij->garray; 1614 PetscFunctionReturn(0); 1615 } 1616 1617 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1618 { 1619 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1620 Mat A = mat->A,B = mat->B; 1621 PetscErrorCode ierr; 1622 PetscLogDouble isend[5],irecv[5]; 1623 1624 PetscFunctionBegin; 1625 info->block_size = 1.0; 1626 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1627 1628 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1629 isend[3] = info->memory; isend[4] = info->mallocs; 1630 1631 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1632 1633 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1634 isend[3] += info->memory; isend[4] += info->mallocs; 1635 if (flag == MAT_LOCAL) { 1636 info->nz_used = isend[0]; 1637 info->nz_allocated = isend[1]; 1638 info->nz_unneeded = isend[2]; 1639 info->memory = isend[3]; 1640 info->mallocs = isend[4]; 1641 } else if (flag == MAT_GLOBAL_MAX) { 1642 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1643 1644 info->nz_used = irecv[0]; 1645 info->nz_allocated = irecv[1]; 1646 info->nz_unneeded = irecv[2]; 1647 info->memory = irecv[3]; 1648 info->mallocs = irecv[4]; 1649 } else if (flag == MAT_GLOBAL_SUM) { 1650 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1651 1652 info->nz_used = irecv[0]; 1653 info->nz_allocated = irecv[1]; 1654 info->nz_unneeded = irecv[2]; 1655 info->memory = irecv[3]; 1656 info->mallocs = irecv[4]; 1657 } 1658 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1659 info->fill_ratio_needed = 0; 1660 info->factor_mallocs = 0; 1661 PetscFunctionReturn(0); 1662 } 1663 1664 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1665 { 1666 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1667 PetscErrorCode ierr; 1668 1669 PetscFunctionBegin; 1670 switch (op) { 1671 case MAT_NEW_NONZERO_LOCATIONS: 1672 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1673 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1674 case MAT_KEEP_NONZERO_PATTERN: 1675 case MAT_NEW_NONZERO_LOCATION_ERR: 1676 case MAT_USE_INODES: 1677 case MAT_IGNORE_ZERO_ENTRIES: 1678 MatCheckPreallocated(A,1); 1679 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1680 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1681 break; 1682 case MAT_ROW_ORIENTED: 1683 MatCheckPreallocated(A,1); 1684 a->roworiented = flg; 1685 1686 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1687 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1688 break; 1689 case MAT_FORCE_DIAGONAL_ENTRIES: 1690 case MAT_SORTED_FULL: 1691 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1692 break; 1693 case MAT_IGNORE_OFF_PROC_ENTRIES: 1694 a->donotstash = flg; 1695 break; 1696 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1697 case MAT_SPD: 1698 case MAT_SYMMETRIC: 1699 case MAT_STRUCTURALLY_SYMMETRIC: 1700 case MAT_HERMITIAN: 1701 case MAT_SYMMETRY_ETERNAL: 1702 break; 1703 case MAT_SUBMAT_SINGLEIS: 1704 A->submat_singleis = flg; 1705 break; 1706 case MAT_STRUCTURE_ONLY: 1707 /* The option is handled directly by MatSetOption() */ 1708 break; 1709 default: 1710 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1711 } 1712 PetscFunctionReturn(0); 1713 } 1714 1715 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1716 { 1717 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1718 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1719 PetscErrorCode ierr; 1720 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1721 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1722 PetscInt *cmap,*idx_p; 1723 1724 PetscFunctionBegin; 1725 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1726 mat->getrowactive = PETSC_TRUE; 1727 1728 if (!mat->rowvalues && (idx || v)) { 1729 /* 1730 allocate enough space to hold information from the longest row. 1731 */ 1732 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1733 PetscInt max = 1,tmp; 1734 for (i=0; i<matin->rmap->n; i++) { 1735 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1736 if (max < tmp) max = tmp; 1737 } 1738 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1739 } 1740 1741 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1742 lrow = row - rstart; 1743 1744 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1745 if (!v) {pvA = NULL; pvB = NULL;} 1746 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1747 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1748 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1749 nztot = nzA + nzB; 1750 1751 cmap = mat->garray; 1752 if (v || idx) { 1753 if (nztot) { 1754 /* Sort by increasing column numbers, assuming A and B already sorted */ 1755 PetscInt imark = -1; 1756 if (v) { 1757 *v = v_p = mat->rowvalues; 1758 for (i=0; i<nzB; i++) { 1759 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1760 else break; 1761 } 1762 imark = i; 1763 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1764 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1765 } 1766 if (idx) { 1767 *idx = idx_p = mat->rowindices; 1768 if (imark > -1) { 1769 for (i=0; i<imark; i++) { 1770 idx_p[i] = cmap[cworkB[i]]; 1771 } 1772 } else { 1773 for (i=0; i<nzB; i++) { 1774 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1775 else break; 1776 } 1777 imark = i; 1778 } 1779 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1780 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1781 } 1782 } else { 1783 if (idx) *idx = NULL; 1784 if (v) *v = NULL; 1785 } 1786 } 1787 *nz = nztot; 1788 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1789 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1790 PetscFunctionReturn(0); 1791 } 1792 1793 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1794 { 1795 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1796 1797 PetscFunctionBegin; 1798 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1799 aij->getrowactive = PETSC_FALSE; 1800 PetscFunctionReturn(0); 1801 } 1802 1803 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1804 { 1805 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1806 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1807 PetscErrorCode ierr; 1808 PetscInt i,j,cstart = mat->cmap->rstart; 1809 PetscReal sum = 0.0; 1810 MatScalar *v; 1811 1812 PetscFunctionBegin; 1813 if (aij->size == 1) { 1814 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1815 } else { 1816 if (type == NORM_FROBENIUS) { 1817 v = amat->a; 1818 for (i=0; i<amat->nz; i++) { 1819 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1820 } 1821 v = bmat->a; 1822 for (i=0; i<bmat->nz; i++) { 1823 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1824 } 1825 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1826 *norm = PetscSqrtReal(*norm); 1827 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1828 } else if (type == NORM_1) { /* max column norm */ 1829 PetscReal *tmp,*tmp2; 1830 PetscInt *jj,*garray = aij->garray; 1831 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1832 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1833 *norm = 0.0; 1834 v = amat->a; jj = amat->j; 1835 for (j=0; j<amat->nz; j++) { 1836 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1837 } 1838 v = bmat->a; jj = bmat->j; 1839 for (j=0; j<bmat->nz; j++) { 1840 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1841 } 1842 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1843 for (j=0; j<mat->cmap->N; j++) { 1844 if (tmp2[j] > *norm) *norm = tmp2[j]; 1845 } 1846 ierr = PetscFree(tmp);CHKERRQ(ierr); 1847 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1848 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1849 } else if (type == NORM_INFINITY) { /* max row norm */ 1850 PetscReal ntemp = 0.0; 1851 for (j=0; j<aij->A->rmap->n; j++) { 1852 v = amat->a + amat->i[j]; 1853 sum = 0.0; 1854 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1855 sum += PetscAbsScalar(*v); v++; 1856 } 1857 v = bmat->a + bmat->i[j]; 1858 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1859 sum += PetscAbsScalar(*v); v++; 1860 } 1861 if (sum > ntemp) ntemp = sum; 1862 } 1863 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1864 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1865 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1866 } 1867 PetscFunctionReturn(0); 1868 } 1869 1870 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1871 { 1872 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1873 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1874 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1875 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1876 PetscErrorCode ierr; 1877 Mat B,A_diag,*B_diag; 1878 const MatScalar *pbv,*bv; 1879 1880 PetscFunctionBegin; 1881 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1882 ai = Aloc->i; aj = Aloc->j; 1883 bi = Bloc->i; bj = Bloc->j; 1884 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1885 PetscInt *d_nnz,*g_nnz,*o_nnz; 1886 PetscSFNode *oloc; 1887 PETSC_UNUSED PetscSF sf; 1888 1889 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1890 /* compute d_nnz for preallocation */ 1891 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 1892 for (i=0; i<ai[ma]; i++) { 1893 d_nnz[aj[i]]++; 1894 } 1895 /* compute local off-diagonal contributions */ 1896 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 1897 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1898 /* map those to global */ 1899 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1900 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1901 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1902 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 1903 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1904 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1905 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1906 1907 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1908 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1909 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1910 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1911 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1912 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1913 } else { 1914 B = *matout; 1915 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1916 } 1917 1918 b = (Mat_MPIAIJ*)B->data; 1919 A_diag = a->A; 1920 B_diag = &b->A; 1921 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1922 A_diag_ncol = A_diag->cmap->N; 1923 B_diag_ilen = sub_B_diag->ilen; 1924 B_diag_i = sub_B_diag->i; 1925 1926 /* Set ilen for diagonal of B */ 1927 for (i=0; i<A_diag_ncol; i++) { 1928 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1929 } 1930 1931 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1932 very quickly (=without using MatSetValues), because all writes are local. */ 1933 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 1934 1935 /* copy over the B part */ 1936 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 1937 ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr); 1938 pbv = bv; 1939 row = A->rmap->rstart; 1940 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1941 cols_tmp = cols; 1942 for (i=0; i<mb; i++) { 1943 ncol = bi[i+1]-bi[i]; 1944 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr); 1945 row++; 1946 pbv += ncol; cols_tmp += ncol; 1947 } 1948 ierr = PetscFree(cols);CHKERRQ(ierr); 1949 ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr); 1950 1951 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1952 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1953 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1954 *matout = B; 1955 } else { 1956 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1957 } 1958 PetscFunctionReturn(0); 1959 } 1960 1961 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1962 { 1963 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1964 Mat a = aij->A,b = aij->B; 1965 PetscErrorCode ierr; 1966 PetscInt s1,s2,s3; 1967 1968 PetscFunctionBegin; 1969 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1970 if (rr) { 1971 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1972 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1973 /* Overlap communication with computation. */ 1974 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1975 } 1976 if (ll) { 1977 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1978 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1979 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1980 } 1981 /* scale the diagonal block */ 1982 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1983 1984 if (rr) { 1985 /* Do a scatter end and then right scale the off-diagonal block */ 1986 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1987 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 1988 } 1989 PetscFunctionReturn(0); 1990 } 1991 1992 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1993 { 1994 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1995 PetscErrorCode ierr; 1996 1997 PetscFunctionBegin; 1998 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1999 PetscFunctionReturn(0); 2000 } 2001 2002 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2003 { 2004 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2005 Mat a,b,c,d; 2006 PetscBool flg; 2007 PetscErrorCode ierr; 2008 2009 PetscFunctionBegin; 2010 a = matA->A; b = matA->B; 2011 c = matB->A; d = matB->B; 2012 2013 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2014 if (flg) { 2015 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2016 } 2017 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2022 { 2023 PetscErrorCode ierr; 2024 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2025 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2026 2027 PetscFunctionBegin; 2028 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2029 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2030 /* because of the column compression in the off-processor part of the matrix a->B, 2031 the number of columns in a->B and b->B may be different, hence we cannot call 2032 the MatCopy() directly on the two parts. If need be, we can provide a more 2033 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2034 then copying the submatrices */ 2035 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2036 } else { 2037 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2038 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2039 } 2040 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2041 PetscFunctionReturn(0); 2042 } 2043 2044 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2045 { 2046 PetscErrorCode ierr; 2047 2048 PetscFunctionBegin; 2049 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2050 PetscFunctionReturn(0); 2051 } 2052 2053 /* 2054 Computes the number of nonzeros per row needed for preallocation when X and Y 2055 have different nonzero structure. 2056 */ 2057 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2058 { 2059 PetscInt i,j,k,nzx,nzy; 2060 2061 PetscFunctionBegin; 2062 /* Set the number of nonzeros in the new matrix */ 2063 for (i=0; i<m; i++) { 2064 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2065 nzx = xi[i+1] - xi[i]; 2066 nzy = yi[i+1] - yi[i]; 2067 nnz[i] = 0; 2068 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2069 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2070 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2071 nnz[i]++; 2072 } 2073 for (; k<nzy; k++) nnz[i]++; 2074 } 2075 PetscFunctionReturn(0); 2076 } 2077 2078 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2079 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2080 { 2081 PetscErrorCode ierr; 2082 PetscInt m = Y->rmap->N; 2083 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2084 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2085 2086 PetscFunctionBegin; 2087 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2088 PetscFunctionReturn(0); 2089 } 2090 2091 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2092 { 2093 PetscErrorCode ierr; 2094 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2095 2096 PetscFunctionBegin; 2097 if (str == SAME_NONZERO_PATTERN) { 2098 ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr); 2099 ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr); 2100 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2101 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2102 } else { 2103 Mat B; 2104 PetscInt *nnz_d,*nnz_o; 2105 2106 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2107 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2108 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2109 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2110 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2111 ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr); 2112 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2113 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2114 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2115 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2116 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2117 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2118 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2119 } 2120 PetscFunctionReturn(0); 2121 } 2122 2123 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2124 2125 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2126 { 2127 #if defined(PETSC_USE_COMPLEX) 2128 PetscErrorCode ierr; 2129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2130 2131 PetscFunctionBegin; 2132 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2133 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2134 #else 2135 PetscFunctionBegin; 2136 #endif 2137 PetscFunctionReturn(0); 2138 } 2139 2140 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2141 { 2142 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2143 PetscErrorCode ierr; 2144 2145 PetscFunctionBegin; 2146 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2147 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2152 { 2153 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2154 PetscErrorCode ierr; 2155 2156 PetscFunctionBegin; 2157 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2158 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2159 PetscFunctionReturn(0); 2160 } 2161 2162 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2163 { 2164 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2165 PetscErrorCode ierr; 2166 PetscInt i,*idxb = NULL,m = A->rmap->n; 2167 PetscScalar *va,*vv; 2168 Vec vB,vA; 2169 const PetscScalar *vb; 2170 2171 PetscFunctionBegin; 2172 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 2173 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2174 2175 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2176 if (idx) { 2177 for (i=0; i<m; i++) { 2178 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2179 } 2180 } 2181 2182 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 2183 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 2184 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2185 2186 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2187 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2188 for (i=0; i<m; i++) { 2189 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2190 vv[i] = vb[i]; 2191 if (idx) idx[i] = a->garray[idxb[i]]; 2192 } else { 2193 vv[i] = va[i]; 2194 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2195 idx[i] = a->garray[idxb[i]]; 2196 } 2197 } 2198 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2199 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2200 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2201 ierr = PetscFree(idxb);CHKERRQ(ierr); 2202 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2203 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2204 PetscFunctionReturn(0); 2205 } 2206 2207 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2208 { 2209 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2210 PetscInt m = A->rmap->n,n = A->cmap->n; 2211 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2212 PetscInt *cmap = mat->garray; 2213 PetscInt *diagIdx, *offdiagIdx; 2214 Vec diagV, offdiagV; 2215 PetscScalar *a, *diagA, *offdiagA; 2216 const PetscScalar *ba,*bav; 2217 PetscInt r,j,col,ncols,*bi,*bj; 2218 PetscErrorCode ierr; 2219 Mat B = mat->B; 2220 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2221 2222 PetscFunctionBegin; 2223 /* When a process holds entire A and other processes have no entry */ 2224 if (A->cmap->N == n) { 2225 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2226 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2227 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr); 2228 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2229 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } else if (n == 0) { 2232 if (m) { 2233 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2234 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2235 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2236 } 2237 PetscFunctionReturn(0); 2238 } 2239 2240 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2241 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2242 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2243 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2244 2245 /* Get offdiagIdx[] for implicit 0.0 */ 2246 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2247 ba = bav; 2248 bi = b->i; 2249 bj = b->j; 2250 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2251 for (r = 0; r < m; r++) { 2252 ncols = bi[r+1] - bi[r]; 2253 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2254 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2255 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2256 offdiagA[r] = 0.0; 2257 2258 /* Find first hole in the cmap */ 2259 for (j=0; j<ncols; j++) { 2260 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2261 if (col > j && j < cstart) { 2262 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2263 break; 2264 } else if (col > j + n && j >= cstart) { 2265 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2266 break; 2267 } 2268 } 2269 if (j == ncols && ncols < A->cmap->N - n) { 2270 /* a hole is outside compressed Bcols */ 2271 if (ncols == 0) { 2272 if (cstart) { 2273 offdiagIdx[r] = 0; 2274 } else offdiagIdx[r] = cend; 2275 } else { /* ncols > 0 */ 2276 offdiagIdx[r] = cmap[ncols-1] + 1; 2277 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2278 } 2279 } 2280 } 2281 2282 for (j=0; j<ncols; j++) { 2283 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2284 ba++; bj++; 2285 } 2286 } 2287 2288 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2289 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2290 for (r = 0; r < m; ++r) { 2291 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2292 a[r] = diagA[r]; 2293 if (idx) idx[r] = cstart + diagIdx[r]; 2294 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2295 a[r] = diagA[r]; 2296 if (idx) { 2297 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2298 idx[r] = cstart + diagIdx[r]; 2299 } else idx[r] = offdiagIdx[r]; 2300 } 2301 } else { 2302 a[r] = offdiagA[r]; 2303 if (idx) idx[r] = offdiagIdx[r]; 2304 } 2305 } 2306 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2307 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2308 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2309 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2310 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2311 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2312 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2313 PetscFunctionReturn(0); 2314 } 2315 2316 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2317 { 2318 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2319 PetscInt m = A->rmap->n,n = A->cmap->n; 2320 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2321 PetscInt *cmap = mat->garray; 2322 PetscInt *diagIdx, *offdiagIdx; 2323 Vec diagV, offdiagV; 2324 PetscScalar *a, *diagA, *offdiagA; 2325 const PetscScalar *ba,*bav; 2326 PetscInt r,j,col,ncols,*bi,*bj; 2327 PetscErrorCode ierr; 2328 Mat B = mat->B; 2329 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2330 2331 PetscFunctionBegin; 2332 /* When a process holds entire A and other processes have no entry */ 2333 if (A->cmap->N == n) { 2334 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2335 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2336 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2337 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2338 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2339 PetscFunctionReturn(0); 2340 } else if (n == 0) { 2341 if (m) { 2342 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2343 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2344 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2345 } 2346 PetscFunctionReturn(0); 2347 } 2348 2349 ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2350 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2351 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2352 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2353 2354 /* Get offdiagIdx[] for implicit 0.0 */ 2355 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2356 ba = bav; 2357 bi = b->i; 2358 bj = b->j; 2359 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2360 for (r = 0; r < m; r++) { 2361 ncols = bi[r+1] - bi[r]; 2362 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2363 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2364 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2365 offdiagA[r] = 0.0; 2366 2367 /* Find first hole in the cmap */ 2368 for (j=0; j<ncols; j++) { 2369 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2370 if (col > j && j < cstart) { 2371 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2372 break; 2373 } else if (col > j + n && j >= cstart) { 2374 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2375 break; 2376 } 2377 } 2378 if (j == ncols && ncols < A->cmap->N - n) { 2379 /* a hole is outside compressed Bcols */ 2380 if (ncols == 0) { 2381 if (cstart) { 2382 offdiagIdx[r] = 0; 2383 } else offdiagIdx[r] = cend; 2384 } else { /* ncols > 0 */ 2385 offdiagIdx[r] = cmap[ncols-1] + 1; 2386 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2387 } 2388 } 2389 } 2390 2391 for (j=0; j<ncols; j++) { 2392 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2393 ba++; bj++; 2394 } 2395 } 2396 2397 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2398 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2399 for (r = 0; r < m; ++r) { 2400 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2401 a[r] = diagA[r]; 2402 if (idx) idx[r] = cstart + diagIdx[r]; 2403 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2404 a[r] = diagA[r]; 2405 if (idx) { 2406 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2407 idx[r] = cstart + diagIdx[r]; 2408 } else idx[r] = offdiagIdx[r]; 2409 } 2410 } else { 2411 a[r] = offdiagA[r]; 2412 if (idx) idx[r] = offdiagIdx[r]; 2413 } 2414 } 2415 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2416 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2417 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2418 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2419 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2420 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2421 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2422 PetscFunctionReturn(0); 2423 } 2424 2425 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2426 { 2427 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2428 PetscInt m = A->rmap->n,n = A->cmap->n; 2429 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2430 PetscInt *cmap = mat->garray; 2431 PetscInt *diagIdx, *offdiagIdx; 2432 Vec diagV, offdiagV; 2433 PetscScalar *a, *diagA, *offdiagA; 2434 const PetscScalar *ba,*bav; 2435 PetscInt r,j,col,ncols,*bi,*bj; 2436 PetscErrorCode ierr; 2437 Mat B = mat->B; 2438 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2439 2440 PetscFunctionBegin; 2441 /* When a process holds entire A and other processes have no entry */ 2442 if (A->cmap->N == n) { 2443 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2444 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2445 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2446 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2447 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2448 PetscFunctionReturn(0); 2449 } else if (n == 0) { 2450 if (m) { 2451 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2452 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2453 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2454 } 2455 PetscFunctionReturn(0); 2456 } 2457 2458 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2459 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2460 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2461 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2462 2463 /* Get offdiagIdx[] for implicit 0.0 */ 2464 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2465 ba = bav; 2466 bi = b->i; 2467 bj = b->j; 2468 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2469 for (r = 0; r < m; r++) { 2470 ncols = bi[r+1] - bi[r]; 2471 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2472 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2473 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2474 offdiagA[r] = 0.0; 2475 2476 /* Find first hole in the cmap */ 2477 for (j=0; j<ncols; j++) { 2478 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2479 if (col > j && j < cstart) { 2480 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2481 break; 2482 } else if (col > j + n && j >= cstart) { 2483 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2484 break; 2485 } 2486 } 2487 if (j == ncols && ncols < A->cmap->N - n) { 2488 /* a hole is outside compressed Bcols */ 2489 if (ncols == 0) { 2490 if (cstart) { 2491 offdiagIdx[r] = 0; 2492 } else offdiagIdx[r] = cend; 2493 } else { /* ncols > 0 */ 2494 offdiagIdx[r] = cmap[ncols-1] + 1; 2495 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2496 } 2497 } 2498 } 2499 2500 for (j=0; j<ncols; j++) { 2501 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2502 ba++; bj++; 2503 } 2504 } 2505 2506 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2507 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2508 for (r = 0; r < m; ++r) { 2509 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2510 a[r] = diagA[r]; 2511 if (idx) idx[r] = cstart + diagIdx[r]; 2512 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2513 a[r] = diagA[r]; 2514 if (idx) { 2515 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2516 idx[r] = cstart + diagIdx[r]; 2517 } else idx[r] = offdiagIdx[r]; 2518 } 2519 } else { 2520 a[r] = offdiagA[r]; 2521 if (idx) idx[r] = offdiagIdx[r]; 2522 } 2523 } 2524 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2525 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2526 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2527 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2528 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2529 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2530 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2531 PetscFunctionReturn(0); 2532 } 2533 2534 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2535 { 2536 PetscErrorCode ierr; 2537 Mat *dummy; 2538 2539 PetscFunctionBegin; 2540 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2541 *newmat = *dummy; 2542 ierr = PetscFree(dummy);CHKERRQ(ierr); 2543 PetscFunctionReturn(0); 2544 } 2545 2546 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2547 { 2548 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2549 PetscErrorCode ierr; 2550 2551 PetscFunctionBegin; 2552 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2553 A->factorerrortype = a->A->factorerrortype; 2554 PetscFunctionReturn(0); 2555 } 2556 2557 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2558 { 2559 PetscErrorCode ierr; 2560 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2561 2562 PetscFunctionBegin; 2563 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2564 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2565 if (x->assembled) { 2566 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2567 } else { 2568 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2569 } 2570 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2571 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2572 PetscFunctionReturn(0); 2573 } 2574 2575 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2576 { 2577 PetscFunctionBegin; 2578 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2579 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2580 PetscFunctionReturn(0); 2581 } 2582 2583 /*@ 2584 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2585 2586 Collective on Mat 2587 2588 Input Parameters: 2589 + A - the matrix 2590 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2591 2592 Level: advanced 2593 2594 @*/ 2595 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2596 { 2597 PetscErrorCode ierr; 2598 2599 PetscFunctionBegin; 2600 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2601 PetscFunctionReturn(0); 2602 } 2603 2604 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2605 { 2606 PetscErrorCode ierr; 2607 PetscBool sc = PETSC_FALSE,flg; 2608 2609 PetscFunctionBegin; 2610 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2611 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2612 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2613 if (flg) { 2614 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2615 } 2616 ierr = PetscOptionsTail();CHKERRQ(ierr); 2617 PetscFunctionReturn(0); 2618 } 2619 2620 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2621 { 2622 PetscErrorCode ierr; 2623 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2624 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2625 2626 PetscFunctionBegin; 2627 if (!Y->preallocated) { 2628 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2629 } else if (!aij->nz) { 2630 PetscInt nonew = aij->nonew; 2631 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2632 aij->nonew = nonew; 2633 } 2634 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2635 PetscFunctionReturn(0); 2636 } 2637 2638 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2639 { 2640 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2641 PetscErrorCode ierr; 2642 2643 PetscFunctionBegin; 2644 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2645 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2646 if (d) { 2647 PetscInt rstart; 2648 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2649 *d += rstart; 2650 2651 } 2652 PetscFunctionReturn(0); 2653 } 2654 2655 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2656 { 2657 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2658 PetscErrorCode ierr; 2659 2660 PetscFunctionBegin; 2661 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2662 PetscFunctionReturn(0); 2663 } 2664 2665 /* -------------------------------------------------------------------*/ 2666 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2667 MatGetRow_MPIAIJ, 2668 MatRestoreRow_MPIAIJ, 2669 MatMult_MPIAIJ, 2670 /* 4*/ MatMultAdd_MPIAIJ, 2671 MatMultTranspose_MPIAIJ, 2672 MatMultTransposeAdd_MPIAIJ, 2673 NULL, 2674 NULL, 2675 NULL, 2676 /*10*/ NULL, 2677 NULL, 2678 NULL, 2679 MatSOR_MPIAIJ, 2680 MatTranspose_MPIAIJ, 2681 /*15*/ MatGetInfo_MPIAIJ, 2682 MatEqual_MPIAIJ, 2683 MatGetDiagonal_MPIAIJ, 2684 MatDiagonalScale_MPIAIJ, 2685 MatNorm_MPIAIJ, 2686 /*20*/ MatAssemblyBegin_MPIAIJ, 2687 MatAssemblyEnd_MPIAIJ, 2688 MatSetOption_MPIAIJ, 2689 MatZeroEntries_MPIAIJ, 2690 /*24*/ MatZeroRows_MPIAIJ, 2691 NULL, 2692 NULL, 2693 NULL, 2694 NULL, 2695 /*29*/ MatSetUp_MPIAIJ, 2696 NULL, 2697 NULL, 2698 MatGetDiagonalBlock_MPIAIJ, 2699 NULL, 2700 /*34*/ MatDuplicate_MPIAIJ, 2701 NULL, 2702 NULL, 2703 NULL, 2704 NULL, 2705 /*39*/ MatAXPY_MPIAIJ, 2706 MatCreateSubMatrices_MPIAIJ, 2707 MatIncreaseOverlap_MPIAIJ, 2708 MatGetValues_MPIAIJ, 2709 MatCopy_MPIAIJ, 2710 /*44*/ MatGetRowMax_MPIAIJ, 2711 MatScale_MPIAIJ, 2712 MatShift_MPIAIJ, 2713 MatDiagonalSet_MPIAIJ, 2714 MatZeroRowsColumns_MPIAIJ, 2715 /*49*/ MatSetRandom_MPIAIJ, 2716 NULL, 2717 NULL, 2718 NULL, 2719 NULL, 2720 /*54*/ MatFDColoringCreate_MPIXAIJ, 2721 NULL, 2722 MatSetUnfactored_MPIAIJ, 2723 MatPermute_MPIAIJ, 2724 NULL, 2725 /*59*/ MatCreateSubMatrix_MPIAIJ, 2726 MatDestroy_MPIAIJ, 2727 MatView_MPIAIJ, 2728 NULL, 2729 NULL, 2730 /*64*/ NULL, 2731 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2732 NULL, 2733 NULL, 2734 NULL, 2735 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2736 MatGetRowMinAbs_MPIAIJ, 2737 NULL, 2738 NULL, 2739 NULL, 2740 NULL, 2741 /*75*/ MatFDColoringApply_AIJ, 2742 MatSetFromOptions_MPIAIJ, 2743 NULL, 2744 NULL, 2745 MatFindZeroDiagonals_MPIAIJ, 2746 /*80*/ NULL, 2747 NULL, 2748 NULL, 2749 /*83*/ MatLoad_MPIAIJ, 2750 MatIsSymmetric_MPIAIJ, 2751 NULL, 2752 NULL, 2753 NULL, 2754 NULL, 2755 /*89*/ NULL, 2756 NULL, 2757 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2758 NULL, 2759 NULL, 2760 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2761 NULL, 2762 NULL, 2763 NULL, 2764 MatBindToCPU_MPIAIJ, 2765 /*99*/ MatProductSetFromOptions_MPIAIJ, 2766 NULL, 2767 NULL, 2768 MatConjugate_MPIAIJ, 2769 NULL, 2770 /*104*/MatSetValuesRow_MPIAIJ, 2771 MatRealPart_MPIAIJ, 2772 MatImaginaryPart_MPIAIJ, 2773 NULL, 2774 NULL, 2775 /*109*/NULL, 2776 NULL, 2777 MatGetRowMin_MPIAIJ, 2778 NULL, 2779 MatMissingDiagonal_MPIAIJ, 2780 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2781 NULL, 2782 MatGetGhosts_MPIAIJ, 2783 NULL, 2784 NULL, 2785 /*119*/MatMultDiagonalBlock_MPIAIJ, 2786 NULL, 2787 NULL, 2788 NULL, 2789 MatGetMultiProcBlock_MPIAIJ, 2790 /*124*/MatFindNonzeroRows_MPIAIJ, 2791 MatGetColumnNorms_MPIAIJ, 2792 MatInvertBlockDiagonal_MPIAIJ, 2793 MatInvertVariableBlockDiagonal_MPIAIJ, 2794 MatCreateSubMatricesMPI_MPIAIJ, 2795 /*129*/NULL, 2796 NULL, 2797 NULL, 2798 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2799 NULL, 2800 /*134*/NULL, 2801 NULL, 2802 NULL, 2803 NULL, 2804 NULL, 2805 /*139*/MatSetBlockSizes_MPIAIJ, 2806 NULL, 2807 NULL, 2808 MatFDColoringSetUp_MPIXAIJ, 2809 MatFindOffBlockDiagonalEntries_MPIAIJ, 2810 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2811 /*145*/NULL, 2812 NULL, 2813 NULL 2814 }; 2815 2816 /* ----------------------------------------------------------------------------------------*/ 2817 2818 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2819 { 2820 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2821 PetscErrorCode ierr; 2822 2823 PetscFunctionBegin; 2824 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2825 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2826 PetscFunctionReturn(0); 2827 } 2828 2829 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2830 { 2831 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2832 PetscErrorCode ierr; 2833 2834 PetscFunctionBegin; 2835 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2836 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2837 PetscFunctionReturn(0); 2838 } 2839 2840 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2841 { 2842 Mat_MPIAIJ *b; 2843 PetscErrorCode ierr; 2844 PetscMPIInt size; 2845 2846 PetscFunctionBegin; 2847 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2848 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2849 b = (Mat_MPIAIJ*)B->data; 2850 2851 #if defined(PETSC_USE_CTABLE) 2852 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2853 #else 2854 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2855 #endif 2856 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2857 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2858 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2859 2860 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2861 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 2862 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2863 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2864 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2865 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2866 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2867 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2868 2869 if (!B->preallocated) { 2870 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2871 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2872 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2873 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2874 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2875 } 2876 2877 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2878 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2879 B->preallocated = PETSC_TRUE; 2880 B->was_assembled = PETSC_FALSE; 2881 B->assembled = PETSC_FALSE; 2882 PetscFunctionReturn(0); 2883 } 2884 2885 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2886 { 2887 Mat_MPIAIJ *b; 2888 PetscErrorCode ierr; 2889 2890 PetscFunctionBegin; 2891 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2892 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2893 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2894 b = (Mat_MPIAIJ*)B->data; 2895 2896 #if defined(PETSC_USE_CTABLE) 2897 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2898 #else 2899 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2900 #endif 2901 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2902 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2903 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2904 2905 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2906 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2907 B->preallocated = PETSC_TRUE; 2908 B->was_assembled = PETSC_FALSE; 2909 B->assembled = PETSC_FALSE; 2910 PetscFunctionReturn(0); 2911 } 2912 2913 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2914 { 2915 Mat mat; 2916 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2917 PetscErrorCode ierr; 2918 2919 PetscFunctionBegin; 2920 *newmat = NULL; 2921 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2922 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2923 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2924 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2925 a = (Mat_MPIAIJ*)mat->data; 2926 2927 mat->factortype = matin->factortype; 2928 mat->assembled = matin->assembled; 2929 mat->insertmode = NOT_SET_VALUES; 2930 mat->preallocated = matin->preallocated; 2931 2932 a->size = oldmat->size; 2933 a->rank = oldmat->rank; 2934 a->donotstash = oldmat->donotstash; 2935 a->roworiented = oldmat->roworiented; 2936 a->rowindices = NULL; 2937 a->rowvalues = NULL; 2938 a->getrowactive = PETSC_FALSE; 2939 2940 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2941 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2942 2943 if (oldmat->colmap) { 2944 #if defined(PETSC_USE_CTABLE) 2945 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2946 #else 2947 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2948 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2949 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2950 #endif 2951 } else a->colmap = NULL; 2952 if (oldmat->garray) { 2953 PetscInt len; 2954 len = oldmat->B->cmap->n; 2955 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2956 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2957 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2958 } else a->garray = NULL; 2959 2960 /* It may happen MatDuplicate is called with a non-assembled matrix 2961 In fact, MatDuplicate only requires the matrix to be preallocated 2962 This may happen inside a DMCreateMatrix_Shell */ 2963 if (oldmat->lvec) { 2964 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2965 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2966 } 2967 if (oldmat->Mvctx) { 2968 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2969 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2970 } 2971 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2972 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2973 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2974 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2975 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2976 *newmat = mat; 2977 PetscFunctionReturn(0); 2978 } 2979 2980 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2981 { 2982 PetscBool isbinary, ishdf5; 2983 PetscErrorCode ierr; 2984 2985 PetscFunctionBegin; 2986 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2987 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2988 /* force binary viewer to load .info file if it has not yet done so */ 2989 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2990 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2991 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2992 if (isbinary) { 2993 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2994 } else if (ishdf5) { 2995 #if defined(PETSC_HAVE_HDF5) 2996 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2997 #else 2998 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2999 #endif 3000 } else { 3001 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3002 } 3003 PetscFunctionReturn(0); 3004 } 3005 3006 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3007 { 3008 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3009 PetscInt *rowidxs,*colidxs; 3010 PetscScalar *matvals; 3011 PetscErrorCode ierr; 3012 3013 PetscFunctionBegin; 3014 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3015 3016 /* read in matrix header */ 3017 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3018 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3019 M = header[1]; N = header[2]; nz = header[3]; 3020 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3021 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3022 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3023 3024 /* set block sizes from the viewer's .info file */ 3025 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3026 /* set global sizes if not set already */ 3027 if (mat->rmap->N < 0) mat->rmap->N = M; 3028 if (mat->cmap->N < 0) mat->cmap->N = N; 3029 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3030 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3031 3032 /* check if the matrix sizes are correct */ 3033 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3034 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3035 3036 /* read in row lengths and build row indices */ 3037 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3038 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3039 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3040 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3041 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 3042 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3043 /* read in column indices and matrix values */ 3044 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3045 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3046 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3047 /* store matrix indices and values */ 3048 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3049 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3050 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3051 PetscFunctionReturn(0); 3052 } 3053 3054 /* Not scalable because of ISAllGather() unless getting all columns. */ 3055 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3056 { 3057 PetscErrorCode ierr; 3058 IS iscol_local; 3059 PetscBool isstride; 3060 PetscMPIInt lisstride=0,gisstride; 3061 3062 PetscFunctionBegin; 3063 /* check if we are grabbing all columns*/ 3064 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3065 3066 if (isstride) { 3067 PetscInt start,len,mstart,mlen; 3068 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3069 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3070 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3071 if (mstart == start && mlen-mstart == len) lisstride = 1; 3072 } 3073 3074 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3075 if (gisstride) { 3076 PetscInt N; 3077 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3078 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3079 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3080 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3081 } else { 3082 PetscInt cbs; 3083 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3084 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3085 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3086 } 3087 3088 *isseq = iscol_local; 3089 PetscFunctionReturn(0); 3090 } 3091 3092 /* 3093 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3094 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3095 3096 Input Parameters: 3097 mat - matrix 3098 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3099 i.e., mat->rstart <= isrow[i] < mat->rend 3100 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3101 i.e., mat->cstart <= iscol[i] < mat->cend 3102 Output Parameter: 3103 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3104 iscol_o - sequential column index set for retrieving mat->B 3105 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3106 */ 3107 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3108 { 3109 PetscErrorCode ierr; 3110 Vec x,cmap; 3111 const PetscInt *is_idx; 3112 PetscScalar *xarray,*cmaparray; 3113 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3114 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3115 Mat B=a->B; 3116 Vec lvec=a->lvec,lcmap; 3117 PetscInt i,cstart,cend,Bn=B->cmap->N; 3118 MPI_Comm comm; 3119 VecScatter Mvctx=a->Mvctx; 3120 3121 PetscFunctionBegin; 3122 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3123 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3124 3125 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3126 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3127 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3128 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3129 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3130 3131 /* Get start indices */ 3132 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3133 isstart -= ncols; 3134 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3135 3136 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3137 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3138 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3139 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3140 for (i=0; i<ncols; i++) { 3141 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3142 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3143 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3144 } 3145 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3146 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3147 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3148 3149 /* Get iscol_d */ 3150 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3151 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3152 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3153 3154 /* Get isrow_d */ 3155 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3156 rstart = mat->rmap->rstart; 3157 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3158 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3159 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3160 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3161 3162 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3163 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3164 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3165 3166 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3167 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3168 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3169 3170 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3171 3172 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3173 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3174 3175 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3176 /* off-process column indices */ 3177 count = 0; 3178 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3179 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3180 3181 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3182 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3183 for (i=0; i<Bn; i++) { 3184 if (PetscRealPart(xarray[i]) > -1.0) { 3185 idx[count] = i; /* local column index in off-diagonal part B */ 3186 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3187 count++; 3188 } 3189 } 3190 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3191 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3192 3193 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3194 /* cannot ensure iscol_o has same blocksize as iscol! */ 3195 3196 ierr = PetscFree(idx);CHKERRQ(ierr); 3197 *garray = cmap1; 3198 3199 ierr = VecDestroy(&x);CHKERRQ(ierr); 3200 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3201 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3202 PetscFunctionReturn(0); 3203 } 3204 3205 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3206 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3207 { 3208 PetscErrorCode ierr; 3209 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3210 Mat M = NULL; 3211 MPI_Comm comm; 3212 IS iscol_d,isrow_d,iscol_o; 3213 Mat Asub = NULL,Bsub = NULL; 3214 PetscInt n; 3215 3216 PetscFunctionBegin; 3217 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3218 3219 if (call == MAT_REUSE_MATRIX) { 3220 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3221 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3222 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3223 3224 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3225 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3226 3227 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3228 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3229 3230 /* Update diagonal and off-diagonal portions of submat */ 3231 asub = (Mat_MPIAIJ*)(*submat)->data; 3232 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3233 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3234 if (n) { 3235 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3236 } 3237 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3238 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3239 3240 } else { /* call == MAT_INITIAL_MATRIX) */ 3241 const PetscInt *garray; 3242 PetscInt BsubN; 3243 3244 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3245 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3246 3247 /* Create local submatrices Asub and Bsub */ 3248 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3249 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3250 3251 /* Create submatrix M */ 3252 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3253 3254 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3255 asub = (Mat_MPIAIJ*)M->data; 3256 3257 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3258 n = asub->B->cmap->N; 3259 if (BsubN > n) { 3260 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3261 const PetscInt *idx; 3262 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3263 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3264 3265 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3266 j = 0; 3267 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3268 for (i=0; i<n; i++) { 3269 if (j >= BsubN) break; 3270 while (subgarray[i] > garray[j]) j++; 3271 3272 if (subgarray[i] == garray[j]) { 3273 idx_new[i] = idx[j++]; 3274 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3275 } 3276 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3277 3278 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3279 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3280 3281 } else if (BsubN < n) { 3282 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3283 } 3284 3285 ierr = PetscFree(garray);CHKERRQ(ierr); 3286 *submat = M; 3287 3288 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3289 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3290 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3291 3292 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3293 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3294 3295 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3296 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3297 } 3298 PetscFunctionReturn(0); 3299 } 3300 3301 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3302 { 3303 PetscErrorCode ierr; 3304 IS iscol_local=NULL,isrow_d; 3305 PetscInt csize; 3306 PetscInt n,i,j,start,end; 3307 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3308 MPI_Comm comm; 3309 3310 PetscFunctionBegin; 3311 /* If isrow has same processor distribution as mat, 3312 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3313 if (call == MAT_REUSE_MATRIX) { 3314 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3315 if (isrow_d) { 3316 sameRowDist = PETSC_TRUE; 3317 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3318 } else { 3319 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3320 if (iscol_local) { 3321 sameRowDist = PETSC_TRUE; 3322 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3323 } 3324 } 3325 } else { 3326 /* Check if isrow has same processor distribution as mat */ 3327 sameDist[0] = PETSC_FALSE; 3328 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3329 if (!n) { 3330 sameDist[0] = PETSC_TRUE; 3331 } else { 3332 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3333 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3334 if (i >= start && j < end) { 3335 sameDist[0] = PETSC_TRUE; 3336 } 3337 } 3338 3339 /* Check if iscol has same processor distribution as mat */ 3340 sameDist[1] = PETSC_FALSE; 3341 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3342 if (!n) { 3343 sameDist[1] = PETSC_TRUE; 3344 } else { 3345 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3346 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3347 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3348 } 3349 3350 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3351 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3352 sameRowDist = tsameDist[0]; 3353 } 3354 3355 if (sameRowDist) { 3356 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3357 /* isrow and iscol have same processor distribution as mat */ 3358 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3359 PetscFunctionReturn(0); 3360 } else { /* sameRowDist */ 3361 /* isrow has same processor distribution as mat */ 3362 if (call == MAT_INITIAL_MATRIX) { 3363 PetscBool sorted; 3364 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3365 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3366 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3367 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3368 3369 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3370 if (sorted) { 3371 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3372 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3373 PetscFunctionReturn(0); 3374 } 3375 } else { /* call == MAT_REUSE_MATRIX */ 3376 IS iscol_sub; 3377 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3378 if (iscol_sub) { 3379 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3380 PetscFunctionReturn(0); 3381 } 3382 } 3383 } 3384 } 3385 3386 /* General case: iscol -> iscol_local which has global size of iscol */ 3387 if (call == MAT_REUSE_MATRIX) { 3388 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3389 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3390 } else { 3391 if (!iscol_local) { 3392 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3393 } 3394 } 3395 3396 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3397 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3398 3399 if (call == MAT_INITIAL_MATRIX) { 3400 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3401 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3402 } 3403 PetscFunctionReturn(0); 3404 } 3405 3406 /*@C 3407 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3408 and "off-diagonal" part of the matrix in CSR format. 3409 3410 Collective 3411 3412 Input Parameters: 3413 + comm - MPI communicator 3414 . A - "diagonal" portion of matrix 3415 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3416 - garray - global index of B columns 3417 3418 Output Parameter: 3419 . mat - the matrix, with input A as its local diagonal matrix 3420 Level: advanced 3421 3422 Notes: 3423 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3424 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3425 3426 .seealso: MatCreateMPIAIJWithSplitArrays() 3427 @*/ 3428 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3429 { 3430 PetscErrorCode ierr; 3431 Mat_MPIAIJ *maij; 3432 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3433 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3434 const PetscScalar *oa; 3435 Mat Bnew; 3436 PetscInt m,n,N; 3437 3438 PetscFunctionBegin; 3439 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3440 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3441 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3442 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3443 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3444 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3445 3446 /* Get global columns of mat */ 3447 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3448 3449 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3450 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3451 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3452 maij = (Mat_MPIAIJ*)(*mat)->data; 3453 3454 (*mat)->preallocated = PETSC_TRUE; 3455 3456 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3457 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3458 3459 /* Set A as diagonal portion of *mat */ 3460 maij->A = A; 3461 3462 nz = oi[m]; 3463 for (i=0; i<nz; i++) { 3464 col = oj[i]; 3465 oj[i] = garray[col]; 3466 } 3467 3468 /* Set Bnew as off-diagonal portion of *mat */ 3469 ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr); 3470 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr); 3471 ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr); 3472 bnew = (Mat_SeqAIJ*)Bnew->data; 3473 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3474 maij->B = Bnew; 3475 3476 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3477 3478 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3479 b->free_a = PETSC_FALSE; 3480 b->free_ij = PETSC_FALSE; 3481 ierr = MatDestroy(&B);CHKERRQ(ierr); 3482 3483 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3484 bnew->free_a = PETSC_TRUE; 3485 bnew->free_ij = PETSC_TRUE; 3486 3487 /* condense columns of maij->B */ 3488 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3489 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3490 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3491 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3492 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3493 PetscFunctionReturn(0); 3494 } 3495 3496 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3497 3498 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3499 { 3500 PetscErrorCode ierr; 3501 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3502 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3503 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3504 Mat M,Msub,B=a->B; 3505 MatScalar *aa; 3506 Mat_SeqAIJ *aij; 3507 PetscInt *garray = a->garray,*colsub,Ncols; 3508 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3509 IS iscol_sub,iscmap; 3510 const PetscInt *is_idx,*cmap; 3511 PetscBool allcolumns=PETSC_FALSE; 3512 MPI_Comm comm; 3513 3514 PetscFunctionBegin; 3515 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3516 if (call == MAT_REUSE_MATRIX) { 3517 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3518 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3519 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3520 3521 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3522 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3523 3524 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3525 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3526 3527 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3528 3529 } else { /* call == MAT_INITIAL_MATRIX) */ 3530 PetscBool flg; 3531 3532 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3533 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3534 3535 /* (1) iscol -> nonscalable iscol_local */ 3536 /* Check for special case: each processor gets entire matrix columns */ 3537 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3538 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3539 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3540 if (allcolumns) { 3541 iscol_sub = iscol_local; 3542 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3543 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3544 3545 } else { 3546 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3547 PetscInt *idx,*cmap1,k; 3548 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3549 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3550 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3551 count = 0; 3552 k = 0; 3553 for (i=0; i<Ncols; i++) { 3554 j = is_idx[i]; 3555 if (j >= cstart && j < cend) { 3556 /* diagonal part of mat */ 3557 idx[count] = j; 3558 cmap1[count++] = i; /* column index in submat */ 3559 } else if (Bn) { 3560 /* off-diagonal part of mat */ 3561 if (j == garray[k]) { 3562 idx[count] = j; 3563 cmap1[count++] = i; /* column index in submat */ 3564 } else if (j > garray[k]) { 3565 while (j > garray[k] && k < Bn-1) k++; 3566 if (j == garray[k]) { 3567 idx[count] = j; 3568 cmap1[count++] = i; /* column index in submat */ 3569 } 3570 } 3571 } 3572 } 3573 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3574 3575 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3576 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3577 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3578 3579 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3580 } 3581 3582 /* (3) Create sequential Msub */ 3583 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3584 } 3585 3586 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3587 aij = (Mat_SeqAIJ*)(Msub)->data; 3588 ii = aij->i; 3589 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3590 3591 /* 3592 m - number of local rows 3593 Ncols - number of columns (same on all processors) 3594 rstart - first row in new global matrix generated 3595 */ 3596 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3597 3598 if (call == MAT_INITIAL_MATRIX) { 3599 /* (4) Create parallel newmat */ 3600 PetscMPIInt rank,size; 3601 PetscInt csize; 3602 3603 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3604 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3605 3606 /* 3607 Determine the number of non-zeros in the diagonal and off-diagonal 3608 portions of the matrix in order to do correct preallocation 3609 */ 3610 3611 /* first get start and end of "diagonal" columns */ 3612 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3613 if (csize == PETSC_DECIDE) { 3614 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3615 if (mglobal == Ncols) { /* square matrix */ 3616 nlocal = m; 3617 } else { 3618 nlocal = Ncols/size + ((Ncols % size) > rank); 3619 } 3620 } else { 3621 nlocal = csize; 3622 } 3623 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3624 rstart = rend - nlocal; 3625 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3626 3627 /* next, compute all the lengths */ 3628 jj = aij->j; 3629 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3630 olens = dlens + m; 3631 for (i=0; i<m; i++) { 3632 jend = ii[i+1] - ii[i]; 3633 olen = 0; 3634 dlen = 0; 3635 for (j=0; j<jend; j++) { 3636 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3637 else dlen++; 3638 jj++; 3639 } 3640 olens[i] = olen; 3641 dlens[i] = dlen; 3642 } 3643 3644 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3645 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3646 3647 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3648 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3649 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3650 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3651 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3652 ierr = PetscFree(dlens);CHKERRQ(ierr); 3653 3654 } else { /* call == MAT_REUSE_MATRIX */ 3655 M = *newmat; 3656 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3657 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3658 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3659 /* 3660 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3661 rather than the slower MatSetValues(). 3662 */ 3663 M->was_assembled = PETSC_TRUE; 3664 M->assembled = PETSC_FALSE; 3665 } 3666 3667 /* (5) Set values of Msub to *newmat */ 3668 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3669 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3670 3671 jj = aij->j; 3672 ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3673 for (i=0; i<m; i++) { 3674 row = rstart + i; 3675 nz = ii[i+1] - ii[i]; 3676 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3677 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3678 jj += nz; aa += nz; 3679 } 3680 ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3681 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3682 3683 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3684 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3685 3686 ierr = PetscFree(colsub);CHKERRQ(ierr); 3687 3688 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3689 if (call == MAT_INITIAL_MATRIX) { 3690 *newmat = M; 3691 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3692 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3693 3694 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3695 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3696 3697 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3698 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3699 3700 if (iscol_local) { 3701 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3702 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3703 } 3704 } 3705 PetscFunctionReturn(0); 3706 } 3707 3708 /* 3709 Not great since it makes two copies of the submatrix, first an SeqAIJ 3710 in local and then by concatenating the local matrices the end result. 3711 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3712 3713 Note: This requires a sequential iscol with all indices. 3714 */ 3715 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3716 { 3717 PetscErrorCode ierr; 3718 PetscMPIInt rank,size; 3719 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3720 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3721 Mat M,Mreuse; 3722 MatScalar *aa,*vwork; 3723 MPI_Comm comm; 3724 Mat_SeqAIJ *aij; 3725 PetscBool colflag,allcolumns=PETSC_FALSE; 3726 3727 PetscFunctionBegin; 3728 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3729 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3730 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3731 3732 /* Check for special case: each processor gets entire matrix columns */ 3733 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3734 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3735 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3736 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3737 3738 if (call == MAT_REUSE_MATRIX) { 3739 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3740 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3741 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3742 } else { 3743 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3744 } 3745 3746 /* 3747 m - number of local rows 3748 n - number of columns (same on all processors) 3749 rstart - first row in new global matrix generated 3750 */ 3751 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3752 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3753 if (call == MAT_INITIAL_MATRIX) { 3754 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3755 ii = aij->i; 3756 jj = aij->j; 3757 3758 /* 3759 Determine the number of non-zeros in the diagonal and off-diagonal 3760 portions of the matrix in order to do correct preallocation 3761 */ 3762 3763 /* first get start and end of "diagonal" columns */ 3764 if (csize == PETSC_DECIDE) { 3765 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3766 if (mglobal == n) { /* square matrix */ 3767 nlocal = m; 3768 } else { 3769 nlocal = n/size + ((n % size) > rank); 3770 } 3771 } else { 3772 nlocal = csize; 3773 } 3774 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3775 rstart = rend - nlocal; 3776 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3777 3778 /* next, compute all the lengths */ 3779 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3780 olens = dlens + m; 3781 for (i=0; i<m; i++) { 3782 jend = ii[i+1] - ii[i]; 3783 olen = 0; 3784 dlen = 0; 3785 for (j=0; j<jend; j++) { 3786 if (*jj < rstart || *jj >= rend) olen++; 3787 else dlen++; 3788 jj++; 3789 } 3790 olens[i] = olen; 3791 dlens[i] = dlen; 3792 } 3793 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3794 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3795 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3796 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3797 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3798 ierr = PetscFree(dlens);CHKERRQ(ierr); 3799 } else { 3800 PetscInt ml,nl; 3801 3802 M = *newmat; 3803 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3804 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3805 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3806 /* 3807 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3808 rather than the slower MatSetValues(). 3809 */ 3810 M->was_assembled = PETSC_TRUE; 3811 M->assembled = PETSC_FALSE; 3812 } 3813 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3814 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3815 ii = aij->i; 3816 jj = aij->j; 3817 3818 /* trigger copy to CPU if needed */ 3819 ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3820 for (i=0; i<m; i++) { 3821 row = rstart + i; 3822 nz = ii[i+1] - ii[i]; 3823 cwork = jj; jj += nz; 3824 vwork = aa; aa += nz; 3825 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3826 } 3827 ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3828 3829 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3830 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3831 *newmat = M; 3832 3833 /* save submatrix used in processor for next request */ 3834 if (call == MAT_INITIAL_MATRIX) { 3835 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3836 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3837 } 3838 PetscFunctionReturn(0); 3839 } 3840 3841 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3842 { 3843 PetscInt m,cstart, cend,j,nnz,i,d; 3844 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3845 const PetscInt *JJ; 3846 PetscErrorCode ierr; 3847 PetscBool nooffprocentries; 3848 3849 PetscFunctionBegin; 3850 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3851 3852 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3853 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3854 m = B->rmap->n; 3855 cstart = B->cmap->rstart; 3856 cend = B->cmap->rend; 3857 rstart = B->rmap->rstart; 3858 3859 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3860 3861 if (PetscDefined(USE_DEBUG)) { 3862 for (i=0; i<m; i++) { 3863 nnz = Ii[i+1]- Ii[i]; 3864 JJ = J + Ii[i]; 3865 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3866 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3867 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3868 } 3869 } 3870 3871 for (i=0; i<m; i++) { 3872 nnz = Ii[i+1]- Ii[i]; 3873 JJ = J + Ii[i]; 3874 nnz_max = PetscMax(nnz_max,nnz); 3875 d = 0; 3876 for (j=0; j<nnz; j++) { 3877 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3878 } 3879 d_nnz[i] = d; 3880 o_nnz[i] = nnz - d; 3881 } 3882 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3883 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3884 3885 for (i=0; i<m; i++) { 3886 ii = i + rstart; 3887 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3888 } 3889 nooffprocentries = B->nooffprocentries; 3890 B->nooffprocentries = PETSC_TRUE; 3891 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3892 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3893 B->nooffprocentries = nooffprocentries; 3894 3895 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3896 PetscFunctionReturn(0); 3897 } 3898 3899 /*@ 3900 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3901 (the default parallel PETSc format). 3902 3903 Collective 3904 3905 Input Parameters: 3906 + B - the matrix 3907 . i - the indices into j for the start of each local row (starts with zero) 3908 . j - the column indices for each local row (starts with zero) 3909 - v - optional values in the matrix 3910 3911 Level: developer 3912 3913 Notes: 3914 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3915 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3916 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3917 3918 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3919 3920 The format which is used for the sparse matrix input, is equivalent to a 3921 row-major ordering.. i.e for the following matrix, the input data expected is 3922 as shown 3923 3924 $ 1 0 0 3925 $ 2 0 3 P0 3926 $ ------- 3927 $ 4 5 6 P1 3928 $ 3929 $ Process0 [P0]: rows_owned=[0,1] 3930 $ i = {0,1,3} [size = nrow+1 = 2+1] 3931 $ j = {0,0,2} [size = 3] 3932 $ v = {1,2,3} [size = 3] 3933 $ 3934 $ Process1 [P1]: rows_owned=[2] 3935 $ i = {0,3} [size = nrow+1 = 1+1] 3936 $ j = {0,1,2} [size = 3] 3937 $ v = {4,5,6} [size = 3] 3938 3939 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3940 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3941 @*/ 3942 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3943 { 3944 PetscErrorCode ierr; 3945 3946 PetscFunctionBegin; 3947 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3948 PetscFunctionReturn(0); 3949 } 3950 3951 /*@C 3952 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3953 (the default parallel PETSc format). For good matrix assembly performance 3954 the user should preallocate the matrix storage by setting the parameters 3955 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3956 performance can be increased by more than a factor of 50. 3957 3958 Collective 3959 3960 Input Parameters: 3961 + B - the matrix 3962 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3963 (same value is used for all local rows) 3964 . d_nnz - array containing the number of nonzeros in the various rows of the 3965 DIAGONAL portion of the local submatrix (possibly different for each row) 3966 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3967 The size of this array is equal to the number of local rows, i.e 'm'. 3968 For matrices that will be factored, you must leave room for (and set) 3969 the diagonal entry even if it is zero. 3970 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3971 submatrix (same value is used for all local rows). 3972 - o_nnz - array containing the number of nonzeros in the various rows of the 3973 OFF-DIAGONAL portion of the local submatrix (possibly different for 3974 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3975 structure. The size of this array is equal to the number 3976 of local rows, i.e 'm'. 3977 3978 If the *_nnz parameter is given then the *_nz parameter is ignored 3979 3980 The AIJ format (also called the Yale sparse matrix format or 3981 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3982 storage. The stored row and column indices begin with zero. 3983 See Users-Manual: ch_mat for details. 3984 3985 The parallel matrix is partitioned such that the first m0 rows belong to 3986 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3987 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3988 3989 The DIAGONAL portion of the local submatrix of a processor can be defined 3990 as the submatrix which is obtained by extraction the part corresponding to 3991 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3992 first row that belongs to the processor, r2 is the last row belonging to 3993 the this processor, and c1-c2 is range of indices of the local part of a 3994 vector suitable for applying the matrix to. This is an mxn matrix. In the 3995 common case of a square matrix, the row and column ranges are the same and 3996 the DIAGONAL part is also square. The remaining portion of the local 3997 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3998 3999 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4000 4001 You can call MatGetInfo() to get information on how effective the preallocation was; 4002 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4003 You can also run with the option -info and look for messages with the string 4004 malloc in them to see if additional memory allocation was needed. 4005 4006 Example usage: 4007 4008 Consider the following 8x8 matrix with 34 non-zero values, that is 4009 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4010 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4011 as follows: 4012 4013 .vb 4014 1 2 0 | 0 3 0 | 0 4 4015 Proc0 0 5 6 | 7 0 0 | 8 0 4016 9 0 10 | 11 0 0 | 12 0 4017 ------------------------------------- 4018 13 0 14 | 15 16 17 | 0 0 4019 Proc1 0 18 0 | 19 20 21 | 0 0 4020 0 0 0 | 22 23 0 | 24 0 4021 ------------------------------------- 4022 Proc2 25 26 27 | 0 0 28 | 29 0 4023 30 0 0 | 31 32 33 | 0 34 4024 .ve 4025 4026 This can be represented as a collection of submatrices as: 4027 4028 .vb 4029 A B C 4030 D E F 4031 G H I 4032 .ve 4033 4034 Where the submatrices A,B,C are owned by proc0, D,E,F are 4035 owned by proc1, G,H,I are owned by proc2. 4036 4037 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4038 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4039 The 'M','N' parameters are 8,8, and have the same values on all procs. 4040 4041 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4042 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4043 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4044 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4045 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4046 matrix, ans [DF] as another SeqAIJ matrix. 4047 4048 When d_nz, o_nz parameters are specified, d_nz storage elements are 4049 allocated for every row of the local diagonal submatrix, and o_nz 4050 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4051 One way to choose d_nz and o_nz is to use the max nonzerors per local 4052 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4053 In this case, the values of d_nz,o_nz are: 4054 .vb 4055 proc0 : dnz = 2, o_nz = 2 4056 proc1 : dnz = 3, o_nz = 2 4057 proc2 : dnz = 1, o_nz = 4 4058 .ve 4059 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4060 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4061 for proc3. i.e we are using 12+15+10=37 storage locations to store 4062 34 values. 4063 4064 When d_nnz, o_nnz parameters are specified, the storage is specified 4065 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4066 In the above case the values for d_nnz,o_nnz are: 4067 .vb 4068 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4069 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4070 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4071 .ve 4072 Here the space allocated is sum of all the above values i.e 34, and 4073 hence pre-allocation is perfect. 4074 4075 Level: intermediate 4076 4077 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4078 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4079 @*/ 4080 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4081 { 4082 PetscErrorCode ierr; 4083 4084 PetscFunctionBegin; 4085 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4086 PetscValidType(B,1); 4087 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4088 PetscFunctionReturn(0); 4089 } 4090 4091 /*@ 4092 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4093 CSR format for the local rows. 4094 4095 Collective 4096 4097 Input Parameters: 4098 + comm - MPI communicator 4099 . m - number of local rows (Cannot be PETSC_DECIDE) 4100 . n - This value should be the same as the local size used in creating the 4101 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4102 calculated if N is given) For square matrices n is almost always m. 4103 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4104 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4105 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4106 . j - column indices 4107 - a - matrix values 4108 4109 Output Parameter: 4110 . mat - the matrix 4111 4112 Level: intermediate 4113 4114 Notes: 4115 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4116 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4117 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4118 4119 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4120 4121 The format which is used for the sparse matrix input, is equivalent to a 4122 row-major ordering.. i.e for the following matrix, the input data expected is 4123 as shown 4124 4125 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4126 4127 $ 1 0 0 4128 $ 2 0 3 P0 4129 $ ------- 4130 $ 4 5 6 P1 4131 $ 4132 $ Process0 [P0]: rows_owned=[0,1] 4133 $ i = {0,1,3} [size = nrow+1 = 2+1] 4134 $ j = {0,0,2} [size = 3] 4135 $ v = {1,2,3} [size = 3] 4136 $ 4137 $ Process1 [P1]: rows_owned=[2] 4138 $ i = {0,3} [size = nrow+1 = 1+1] 4139 $ j = {0,1,2} [size = 3] 4140 $ v = {4,5,6} [size = 3] 4141 4142 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4143 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4144 @*/ 4145 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4146 { 4147 PetscErrorCode ierr; 4148 4149 PetscFunctionBegin; 4150 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4151 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4152 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4153 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4154 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4155 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4156 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4157 PetscFunctionReturn(0); 4158 } 4159 4160 /*@ 4161 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4162 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4163 4164 Collective 4165 4166 Input Parameters: 4167 + mat - the matrix 4168 . m - number of local rows (Cannot be PETSC_DECIDE) 4169 . n - This value should be the same as the local size used in creating the 4170 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4171 calculated if N is given) For square matrices n is almost always m. 4172 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4173 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4174 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4175 . J - column indices 4176 - v - matrix values 4177 4178 Level: intermediate 4179 4180 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4181 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4182 @*/ 4183 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4184 { 4185 PetscErrorCode ierr; 4186 PetscInt cstart,nnz,i,j; 4187 PetscInt *ld; 4188 PetscBool nooffprocentries; 4189 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4190 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4191 PetscScalar *ad = Ad->a, *ao = Ao->a; 4192 const PetscInt *Adi = Ad->i; 4193 PetscInt ldi,Iii,md; 4194 4195 PetscFunctionBegin; 4196 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4197 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4198 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4199 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4200 4201 cstart = mat->cmap->rstart; 4202 if (!Aij->ld) { 4203 /* count number of entries below block diagonal */ 4204 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4205 Aij->ld = ld; 4206 for (i=0; i<m; i++) { 4207 nnz = Ii[i+1]- Ii[i]; 4208 j = 0; 4209 while (J[j] < cstart && j < nnz) {j++;} 4210 J += nnz; 4211 ld[i] = j; 4212 } 4213 } else { 4214 ld = Aij->ld; 4215 } 4216 4217 for (i=0; i<m; i++) { 4218 nnz = Ii[i+1]- Ii[i]; 4219 Iii = Ii[i]; 4220 ldi = ld[i]; 4221 md = Adi[i+1]-Adi[i]; 4222 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4223 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4224 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4225 ad += md; 4226 ao += nnz - md; 4227 } 4228 nooffprocentries = mat->nooffprocentries; 4229 mat->nooffprocentries = PETSC_TRUE; 4230 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4231 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4232 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4233 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4234 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4235 mat->nooffprocentries = nooffprocentries; 4236 PetscFunctionReturn(0); 4237 } 4238 4239 /*@C 4240 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4241 (the default parallel PETSc format). For good matrix assembly performance 4242 the user should preallocate the matrix storage by setting the parameters 4243 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4244 performance can be increased by more than a factor of 50. 4245 4246 Collective 4247 4248 Input Parameters: 4249 + comm - MPI communicator 4250 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4251 This value should be the same as the local size used in creating the 4252 y vector for the matrix-vector product y = Ax. 4253 . n - This value should be the same as the local size used in creating the 4254 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4255 calculated if N is given) For square matrices n is almost always m. 4256 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4257 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4258 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4259 (same value is used for all local rows) 4260 . d_nnz - array containing the number of nonzeros in the various rows of the 4261 DIAGONAL portion of the local submatrix (possibly different for each row) 4262 or NULL, if d_nz is used to specify the nonzero structure. 4263 The size of this array is equal to the number of local rows, i.e 'm'. 4264 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4265 submatrix (same value is used for all local rows). 4266 - o_nnz - array containing the number of nonzeros in the various rows of the 4267 OFF-DIAGONAL portion of the local submatrix (possibly different for 4268 each row) or NULL, if o_nz is used to specify the nonzero 4269 structure. The size of this array is equal to the number 4270 of local rows, i.e 'm'. 4271 4272 Output Parameter: 4273 . A - the matrix 4274 4275 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4276 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4277 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4278 4279 Notes: 4280 If the *_nnz parameter is given then the *_nz parameter is ignored 4281 4282 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4283 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4284 storage requirements for this matrix. 4285 4286 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4287 processor than it must be used on all processors that share the object for 4288 that argument. 4289 4290 The user MUST specify either the local or global matrix dimensions 4291 (possibly both). 4292 4293 The parallel matrix is partitioned across processors such that the 4294 first m0 rows belong to process 0, the next m1 rows belong to 4295 process 1, the next m2 rows belong to process 2 etc.. where 4296 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4297 values corresponding to [m x N] submatrix. 4298 4299 The columns are logically partitioned with the n0 columns belonging 4300 to 0th partition, the next n1 columns belonging to the next 4301 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4302 4303 The DIAGONAL portion of the local submatrix on any given processor 4304 is the submatrix corresponding to the rows and columns m,n 4305 corresponding to the given processor. i.e diagonal matrix on 4306 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4307 etc. The remaining portion of the local submatrix [m x (N-n)] 4308 constitute the OFF-DIAGONAL portion. The example below better 4309 illustrates this concept. 4310 4311 For a square global matrix we define each processor's diagonal portion 4312 to be its local rows and the corresponding columns (a square submatrix); 4313 each processor's off-diagonal portion encompasses the remainder of the 4314 local matrix (a rectangular submatrix). 4315 4316 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4317 4318 When calling this routine with a single process communicator, a matrix of 4319 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4320 type of communicator, use the construction mechanism 4321 .vb 4322 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4323 .ve 4324 4325 $ MatCreate(...,&A); 4326 $ MatSetType(A,MATMPIAIJ); 4327 $ MatSetSizes(A, m,n,M,N); 4328 $ MatMPIAIJSetPreallocation(A,...); 4329 4330 By default, this format uses inodes (identical nodes) when possible. 4331 We search for consecutive rows with the same nonzero structure, thereby 4332 reusing matrix information to achieve increased efficiency. 4333 4334 Options Database Keys: 4335 + -mat_no_inode - Do not use inodes 4336 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4337 4338 4339 4340 Example usage: 4341 4342 Consider the following 8x8 matrix with 34 non-zero values, that is 4343 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4344 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4345 as follows 4346 4347 .vb 4348 1 2 0 | 0 3 0 | 0 4 4349 Proc0 0 5 6 | 7 0 0 | 8 0 4350 9 0 10 | 11 0 0 | 12 0 4351 ------------------------------------- 4352 13 0 14 | 15 16 17 | 0 0 4353 Proc1 0 18 0 | 19 20 21 | 0 0 4354 0 0 0 | 22 23 0 | 24 0 4355 ------------------------------------- 4356 Proc2 25 26 27 | 0 0 28 | 29 0 4357 30 0 0 | 31 32 33 | 0 34 4358 .ve 4359 4360 This can be represented as a collection of submatrices as 4361 4362 .vb 4363 A B C 4364 D E F 4365 G H I 4366 .ve 4367 4368 Where the submatrices A,B,C are owned by proc0, D,E,F are 4369 owned by proc1, G,H,I are owned by proc2. 4370 4371 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4372 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4373 The 'M','N' parameters are 8,8, and have the same values on all procs. 4374 4375 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4376 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4377 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4378 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4379 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4380 matrix, ans [DF] as another SeqAIJ matrix. 4381 4382 When d_nz, o_nz parameters are specified, d_nz storage elements are 4383 allocated for every row of the local diagonal submatrix, and o_nz 4384 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4385 One way to choose d_nz and o_nz is to use the max nonzerors per local 4386 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4387 In this case, the values of d_nz,o_nz are 4388 .vb 4389 proc0 : dnz = 2, o_nz = 2 4390 proc1 : dnz = 3, o_nz = 2 4391 proc2 : dnz = 1, o_nz = 4 4392 .ve 4393 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4394 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4395 for proc3. i.e we are using 12+15+10=37 storage locations to store 4396 34 values. 4397 4398 When d_nnz, o_nnz parameters are specified, the storage is specified 4399 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4400 In the above case the values for d_nnz,o_nnz are 4401 .vb 4402 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4403 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4404 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4405 .ve 4406 Here the space allocated is sum of all the above values i.e 34, and 4407 hence pre-allocation is perfect. 4408 4409 Level: intermediate 4410 4411 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4412 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4413 @*/ 4414 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4415 { 4416 PetscErrorCode ierr; 4417 PetscMPIInt size; 4418 4419 PetscFunctionBegin; 4420 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4421 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4422 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4423 if (size > 1) { 4424 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4425 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4426 } else { 4427 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4428 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4429 } 4430 PetscFunctionReturn(0); 4431 } 4432 4433 /*@C 4434 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4435 4436 Not collective 4437 4438 Input Parameter: 4439 . A - The MPIAIJ matrix 4440 4441 Output Parameters: 4442 + Ad - The local diagonal block as a SeqAIJ matrix 4443 . Ao - The local off-diagonal block as a SeqAIJ matrix 4444 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4445 4446 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4447 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4448 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4449 local column numbers to global column numbers in the original matrix. 4450 4451 Level: intermediate 4452 4453 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4454 @*/ 4455 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4456 { 4457 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4458 PetscBool flg; 4459 PetscErrorCode ierr; 4460 4461 PetscFunctionBegin; 4462 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4463 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4464 if (Ad) *Ad = a->A; 4465 if (Ao) *Ao = a->B; 4466 if (colmap) *colmap = a->garray; 4467 PetscFunctionReturn(0); 4468 } 4469 4470 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4471 { 4472 PetscErrorCode ierr; 4473 PetscInt m,N,i,rstart,nnz,Ii; 4474 PetscInt *indx; 4475 PetscScalar *values; 4476 4477 PetscFunctionBegin; 4478 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4479 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4480 PetscInt *dnz,*onz,sum,bs,cbs; 4481 4482 if (n == PETSC_DECIDE) { 4483 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4484 } 4485 /* Check sum(n) = N */ 4486 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4487 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4488 4489 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4490 rstart -= m; 4491 4492 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4493 for (i=0; i<m; i++) { 4494 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4495 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4496 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4497 } 4498 4499 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4500 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4501 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4502 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4503 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4504 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4505 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4506 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4507 ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4508 } 4509 4510 /* numeric phase */ 4511 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4512 for (i=0; i<m; i++) { 4513 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4514 Ii = i + rstart; 4515 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4516 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4517 } 4518 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4519 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4520 PetscFunctionReturn(0); 4521 } 4522 4523 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4524 { 4525 PetscErrorCode ierr; 4526 PetscMPIInt rank; 4527 PetscInt m,N,i,rstart,nnz; 4528 size_t len; 4529 const PetscInt *indx; 4530 PetscViewer out; 4531 char *name; 4532 Mat B; 4533 const PetscScalar *values; 4534 4535 PetscFunctionBegin; 4536 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4537 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4538 /* Should this be the type of the diagonal block of A? */ 4539 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4540 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4541 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4542 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4543 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4544 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4545 for (i=0; i<m; i++) { 4546 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4547 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4548 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4549 } 4550 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4551 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4552 4553 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 4554 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4555 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4556 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4557 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4558 ierr = PetscFree(name);CHKERRQ(ierr); 4559 ierr = MatView(B,out);CHKERRQ(ierr); 4560 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4561 ierr = MatDestroy(&B);CHKERRQ(ierr); 4562 PetscFunctionReturn(0); 4563 } 4564 4565 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4566 { 4567 PetscErrorCode ierr; 4568 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4569 4570 PetscFunctionBegin; 4571 if (!merge) PetscFunctionReturn(0); 4572 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4573 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4574 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4575 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4576 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4577 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4578 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4579 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4580 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4581 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4582 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4583 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4584 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4585 ierr = PetscFree(merge);CHKERRQ(ierr); 4586 PetscFunctionReturn(0); 4587 } 4588 4589 #include <../src/mat/utils/freespace.h> 4590 #include <petscbt.h> 4591 4592 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4593 { 4594 PetscErrorCode ierr; 4595 MPI_Comm comm; 4596 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4597 PetscMPIInt size,rank,taga,*len_s; 4598 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4599 PetscInt proc,m; 4600 PetscInt **buf_ri,**buf_rj; 4601 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4602 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4603 MPI_Request *s_waits,*r_waits; 4604 MPI_Status *status; 4605 MatScalar *aa=a->a; 4606 MatScalar **abuf_r,*ba_i; 4607 Mat_Merge_SeqsToMPI *merge; 4608 PetscContainer container; 4609 4610 PetscFunctionBegin; 4611 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4612 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4613 4614 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4615 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4616 4617 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4618 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4619 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4620 4621 bi = merge->bi; 4622 bj = merge->bj; 4623 buf_ri = merge->buf_ri; 4624 buf_rj = merge->buf_rj; 4625 4626 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4627 owners = merge->rowmap->range; 4628 len_s = merge->len_s; 4629 4630 /* send and recv matrix values */ 4631 /*-----------------------------*/ 4632 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4633 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4634 4635 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4636 for (proc=0,k=0; proc<size; proc++) { 4637 if (!len_s[proc]) continue; 4638 i = owners[proc]; 4639 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr); 4640 k++; 4641 } 4642 4643 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);} 4644 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);} 4645 ierr = PetscFree(status);CHKERRQ(ierr); 4646 4647 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4648 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4649 4650 /* insert mat values of mpimat */ 4651 /*----------------------------*/ 4652 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4653 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4654 4655 for (k=0; k<merge->nrecv; k++) { 4656 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4657 nrows = *(buf_ri_k[k]); 4658 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4659 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4660 } 4661 4662 /* set values of ba */ 4663 m = merge->rowmap->n; 4664 for (i=0; i<m; i++) { 4665 arow = owners[rank] + i; 4666 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4667 bnzi = bi[i+1] - bi[i]; 4668 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4669 4670 /* add local non-zero vals of this proc's seqmat into ba */ 4671 anzi = ai[arow+1] - ai[arow]; 4672 aj = a->j + ai[arow]; 4673 aa = a->a + ai[arow]; 4674 nextaj = 0; 4675 for (j=0; nextaj<anzi; j++) { 4676 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4677 ba_i[j] += aa[nextaj++]; 4678 } 4679 } 4680 4681 /* add received vals into ba */ 4682 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4683 /* i-th row */ 4684 if (i == *nextrow[k]) { 4685 anzi = *(nextai[k]+1) - *nextai[k]; 4686 aj = buf_rj[k] + *(nextai[k]); 4687 aa = abuf_r[k] + *(nextai[k]); 4688 nextaj = 0; 4689 for (j=0; nextaj<anzi; j++) { 4690 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4691 ba_i[j] += aa[nextaj++]; 4692 } 4693 } 4694 nextrow[k]++; nextai[k]++; 4695 } 4696 } 4697 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4698 } 4699 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4700 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4701 4702 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4703 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4704 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4705 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4706 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4707 PetscFunctionReturn(0); 4708 } 4709 4710 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4711 { 4712 PetscErrorCode ierr; 4713 Mat B_mpi; 4714 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4715 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4716 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4717 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4718 PetscInt len,proc,*dnz,*onz,bs,cbs; 4719 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4720 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4721 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4722 MPI_Status *status; 4723 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4724 PetscBT lnkbt; 4725 Mat_Merge_SeqsToMPI *merge; 4726 PetscContainer container; 4727 4728 PetscFunctionBegin; 4729 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4730 4731 /* make sure it is a PETSc comm */ 4732 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4733 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4734 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4735 4736 ierr = PetscNew(&merge);CHKERRQ(ierr); 4737 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4738 4739 /* determine row ownership */ 4740 /*---------------------------------------------------------*/ 4741 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4742 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4743 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4744 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4745 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4746 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4747 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4748 4749 m = merge->rowmap->n; 4750 owners = merge->rowmap->range; 4751 4752 /* determine the number of messages to send, their lengths */ 4753 /*---------------------------------------------------------*/ 4754 len_s = merge->len_s; 4755 4756 len = 0; /* length of buf_si[] */ 4757 merge->nsend = 0; 4758 for (proc=0; proc<size; proc++) { 4759 len_si[proc] = 0; 4760 if (proc == rank) { 4761 len_s[proc] = 0; 4762 } else { 4763 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4764 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4765 } 4766 if (len_s[proc]) { 4767 merge->nsend++; 4768 nrows = 0; 4769 for (i=owners[proc]; i<owners[proc+1]; i++) { 4770 if (ai[i+1] > ai[i]) nrows++; 4771 } 4772 len_si[proc] = 2*(nrows+1); 4773 len += len_si[proc]; 4774 } 4775 } 4776 4777 /* determine the number and length of messages to receive for ij-structure */ 4778 /*-------------------------------------------------------------------------*/ 4779 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4780 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4781 4782 /* post the Irecv of j-structure */ 4783 /*-------------------------------*/ 4784 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4785 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4786 4787 /* post the Isend of j-structure */ 4788 /*--------------------------------*/ 4789 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4790 4791 for (proc=0, k=0; proc<size; proc++) { 4792 if (!len_s[proc]) continue; 4793 i = owners[proc]; 4794 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr); 4795 k++; 4796 } 4797 4798 /* receives and sends of j-structure are complete */ 4799 /*------------------------------------------------*/ 4800 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);} 4801 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);} 4802 4803 /* send and recv i-structure */ 4804 /*---------------------------*/ 4805 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4806 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4807 4808 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4809 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4810 for (proc=0,k=0; proc<size; proc++) { 4811 if (!len_s[proc]) continue; 4812 /* form outgoing message for i-structure: 4813 buf_si[0]: nrows to be sent 4814 [1:nrows]: row index (global) 4815 [nrows+1:2*nrows+1]: i-structure index 4816 */ 4817 /*-------------------------------------------*/ 4818 nrows = len_si[proc]/2 - 1; 4819 buf_si_i = buf_si + nrows+1; 4820 buf_si[0] = nrows; 4821 buf_si_i[0] = 0; 4822 nrows = 0; 4823 for (i=owners[proc]; i<owners[proc+1]; i++) { 4824 anzi = ai[i+1] - ai[i]; 4825 if (anzi) { 4826 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4827 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4828 nrows++; 4829 } 4830 } 4831 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr); 4832 k++; 4833 buf_si += len_si[proc]; 4834 } 4835 4836 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);} 4837 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);} 4838 4839 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4840 for (i=0; i<merge->nrecv; i++) { 4841 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4842 } 4843 4844 ierr = PetscFree(len_si);CHKERRQ(ierr); 4845 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4846 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4847 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4848 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4849 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4850 ierr = PetscFree(status);CHKERRQ(ierr); 4851 4852 /* compute a local seq matrix in each processor */ 4853 /*----------------------------------------------*/ 4854 /* allocate bi array and free space for accumulating nonzero column info */ 4855 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4856 bi[0] = 0; 4857 4858 /* create and initialize a linked list */ 4859 nlnk = N+1; 4860 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4861 4862 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4863 len = ai[owners[rank+1]] - ai[owners[rank]]; 4864 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4865 4866 current_space = free_space; 4867 4868 /* determine symbolic info for each local row */ 4869 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4870 4871 for (k=0; k<merge->nrecv; k++) { 4872 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4873 nrows = *buf_ri_k[k]; 4874 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4875 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4876 } 4877 4878 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4879 len = 0; 4880 for (i=0; i<m; i++) { 4881 bnzi = 0; 4882 /* add local non-zero cols of this proc's seqmat into lnk */ 4883 arow = owners[rank] + i; 4884 anzi = ai[arow+1] - ai[arow]; 4885 aj = a->j + ai[arow]; 4886 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4887 bnzi += nlnk; 4888 /* add received col data into lnk */ 4889 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4890 if (i == *nextrow[k]) { /* i-th row */ 4891 anzi = *(nextai[k]+1) - *nextai[k]; 4892 aj = buf_rj[k] + *nextai[k]; 4893 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4894 bnzi += nlnk; 4895 nextrow[k]++; nextai[k]++; 4896 } 4897 } 4898 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4899 4900 /* if free space is not available, make more free space */ 4901 if (current_space->local_remaining<bnzi) { 4902 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4903 nspacedouble++; 4904 } 4905 /* copy data into free space, then initialize lnk */ 4906 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4907 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4908 4909 current_space->array += bnzi; 4910 current_space->local_used += bnzi; 4911 current_space->local_remaining -= bnzi; 4912 4913 bi[i+1] = bi[i] + bnzi; 4914 } 4915 4916 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4917 4918 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4919 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4920 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4921 4922 /* create symbolic parallel matrix B_mpi */ 4923 /*---------------------------------------*/ 4924 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4925 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4926 if (n==PETSC_DECIDE) { 4927 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4928 } else { 4929 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4930 } 4931 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4932 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4933 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4934 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4935 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4936 4937 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4938 B_mpi->assembled = PETSC_FALSE; 4939 merge->bi = bi; 4940 merge->bj = bj; 4941 merge->buf_ri = buf_ri; 4942 merge->buf_rj = buf_rj; 4943 merge->coi = NULL; 4944 merge->coj = NULL; 4945 merge->owners_co = NULL; 4946 4947 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4948 4949 /* attach the supporting struct to B_mpi for reuse */ 4950 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4951 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4952 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 4953 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4954 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4955 *mpimat = B_mpi; 4956 4957 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4958 PetscFunctionReturn(0); 4959 } 4960 4961 /*@C 4962 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4963 matrices from each processor 4964 4965 Collective 4966 4967 Input Parameters: 4968 + comm - the communicators the parallel matrix will live on 4969 . seqmat - the input sequential matrices 4970 . m - number of local rows (or PETSC_DECIDE) 4971 . n - number of local columns (or PETSC_DECIDE) 4972 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4973 4974 Output Parameter: 4975 . mpimat - the parallel matrix generated 4976 4977 Level: advanced 4978 4979 Notes: 4980 The dimensions of the sequential matrix in each processor MUST be the same. 4981 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4982 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4983 @*/ 4984 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4985 { 4986 PetscErrorCode ierr; 4987 PetscMPIInt size; 4988 4989 PetscFunctionBegin; 4990 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4991 if (size == 1) { 4992 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4993 if (scall == MAT_INITIAL_MATRIX) { 4994 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4995 } else { 4996 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4997 } 4998 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4999 PetscFunctionReturn(0); 5000 } 5001 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5002 if (scall == MAT_INITIAL_MATRIX) { 5003 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5004 } 5005 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5006 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5007 PetscFunctionReturn(0); 5008 } 5009 5010 /*@ 5011 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5012 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5013 with MatGetSize() 5014 5015 Not Collective 5016 5017 Input Parameters: 5018 + A - the matrix 5019 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5020 5021 Output Parameter: 5022 . A_loc - the local sequential matrix generated 5023 5024 Level: developer 5025 5026 Notes: 5027 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5028 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5029 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5030 modify the values of the returned A_loc. 5031 5032 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 5033 @*/ 5034 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5035 { 5036 PetscErrorCode ierr; 5037 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5038 Mat_SeqAIJ *mat,*a,*b; 5039 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5040 const PetscScalar *aa,*ba,*aav,*bav; 5041 PetscScalar *ca,*cam; 5042 PetscMPIInt size; 5043 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5044 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5045 PetscBool match; 5046 5047 PetscFunctionBegin; 5048 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5049 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5050 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5051 if (size == 1) { 5052 if (scall == MAT_INITIAL_MATRIX) { 5053 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5054 *A_loc = mpimat->A; 5055 } else if (scall == MAT_REUSE_MATRIX) { 5056 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5057 } 5058 PetscFunctionReturn(0); 5059 } 5060 5061 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5062 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5063 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5064 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5065 ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5066 ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5067 aa = aav; 5068 ba = bav; 5069 if (scall == MAT_INITIAL_MATRIX) { 5070 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5071 ci[0] = 0; 5072 for (i=0; i<am; i++) { 5073 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5074 } 5075 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5076 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5077 k = 0; 5078 for (i=0; i<am; i++) { 5079 ncols_o = bi[i+1] - bi[i]; 5080 ncols_d = ai[i+1] - ai[i]; 5081 /* off-diagonal portion of A */ 5082 for (jo=0; jo<ncols_o; jo++) { 5083 col = cmap[*bj]; 5084 if (col >= cstart) break; 5085 cj[k] = col; bj++; 5086 ca[k++] = *ba++; 5087 } 5088 /* diagonal portion of A */ 5089 for (j=0; j<ncols_d; j++) { 5090 cj[k] = cstart + *aj++; 5091 ca[k++] = *aa++; 5092 } 5093 /* off-diagonal portion of A */ 5094 for (j=jo; j<ncols_o; j++) { 5095 cj[k] = cmap[*bj++]; 5096 ca[k++] = *ba++; 5097 } 5098 } 5099 /* put together the new matrix */ 5100 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5101 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5102 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5103 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5104 mat->free_a = PETSC_TRUE; 5105 mat->free_ij = PETSC_TRUE; 5106 mat->nonew = 0; 5107 } else if (scall == MAT_REUSE_MATRIX) { 5108 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5109 #if defined(PETSC_USE_DEVICE) 5110 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5111 #endif 5112 ci = mat->i; cj = mat->j; cam = mat->a; 5113 for (i=0; i<am; i++) { 5114 /* off-diagonal portion of A */ 5115 ncols_o = bi[i+1] - bi[i]; 5116 for (jo=0; jo<ncols_o; jo++) { 5117 col = cmap[*bj]; 5118 if (col >= cstart) break; 5119 *cam++ = *ba++; bj++; 5120 } 5121 /* diagonal portion of A */ 5122 ncols_d = ai[i+1] - ai[i]; 5123 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5124 /* off-diagonal portion of A */ 5125 for (j=jo; j<ncols_o; j++) { 5126 *cam++ = *ba++; bj++; 5127 } 5128 } 5129 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5130 ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5131 ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5132 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5133 PetscFunctionReturn(0); 5134 } 5135 5136 /*@ 5137 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5138 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5139 5140 Not Collective 5141 5142 Input Parameters: 5143 + A - the matrix 5144 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5145 5146 Output Parameter: 5147 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5148 - A_loc - the local sequential matrix generated 5149 5150 Level: developer 5151 5152 Notes: 5153 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5154 5155 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5156 5157 @*/ 5158 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5159 { 5160 PetscErrorCode ierr; 5161 Mat Ao,Ad; 5162 const PetscInt *cmap; 5163 PetscMPIInt size; 5164 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5165 5166 PetscFunctionBegin; 5167 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr); 5168 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5169 if (size == 1) { 5170 if (scall == MAT_INITIAL_MATRIX) { 5171 ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr); 5172 *A_loc = Ad; 5173 } else if (scall == MAT_REUSE_MATRIX) { 5174 ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5175 } 5176 if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); } 5177 PetscFunctionReturn(0); 5178 } 5179 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr); 5180 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5181 if (f) { 5182 ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr); 5183 } else { 5184 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5185 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5186 Mat_SeqAIJ *c; 5187 PetscInt *ai = a->i, *aj = a->j; 5188 PetscInt *bi = b->i, *bj = b->j; 5189 PetscInt *ci,*cj; 5190 const PetscScalar *aa,*ba; 5191 PetscScalar *ca; 5192 PetscInt i,j,am,dn,on; 5193 5194 ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr); 5195 ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr); 5196 ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr); 5197 ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr); 5198 if (scall == MAT_INITIAL_MATRIX) { 5199 PetscInt k; 5200 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5201 ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr); 5202 ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr); 5203 ci[0] = 0; 5204 for (i=0,k=0; i<am; i++) { 5205 const PetscInt ncols_o = bi[i+1] - bi[i]; 5206 const PetscInt ncols_d = ai[i+1] - ai[i]; 5207 ci[i+1] = ci[i] + ncols_o + ncols_d; 5208 /* diagonal portion of A */ 5209 for (j=0; j<ncols_d; j++,k++) { 5210 cj[k] = *aj++; 5211 ca[k] = *aa++; 5212 } 5213 /* off-diagonal portion of A */ 5214 for (j=0; j<ncols_o; j++,k++) { 5215 cj[k] = dn + *bj++; 5216 ca[k] = *ba++; 5217 } 5218 } 5219 /* put together the new matrix */ 5220 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr); 5221 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5222 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5223 c = (Mat_SeqAIJ*)(*A_loc)->data; 5224 c->free_a = PETSC_TRUE; 5225 c->free_ij = PETSC_TRUE; 5226 c->nonew = 0; 5227 ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr); 5228 } else if (scall == MAT_REUSE_MATRIX) { 5229 #if defined(PETSC_HAVE_DEVICE) 5230 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5231 #endif 5232 c = (Mat_SeqAIJ*)(*A_loc)->data; 5233 ca = c->a; 5234 for (i=0; i<am; i++) { 5235 const PetscInt ncols_d = ai[i+1] - ai[i]; 5236 const PetscInt ncols_o = bi[i+1] - bi[i]; 5237 /* diagonal portion of A */ 5238 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5239 /* off-diagonal portion of A */ 5240 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5241 } 5242 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5243 ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr); 5244 ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr); 5245 if (glob) { 5246 PetscInt cst, *gidx; 5247 5248 ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr); 5249 ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr); 5250 for (i=0; i<dn; i++) gidx[i] = cst + i; 5251 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5252 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr); 5253 } 5254 } 5255 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5256 PetscFunctionReturn(0); 5257 } 5258 5259 /*@C 5260 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5261 5262 Not Collective 5263 5264 Input Parameters: 5265 + A - the matrix 5266 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5267 - row, col - index sets of rows and columns to extract (or NULL) 5268 5269 Output Parameter: 5270 . A_loc - the local sequential matrix generated 5271 5272 Level: developer 5273 5274 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5275 5276 @*/ 5277 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5278 { 5279 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5280 PetscErrorCode ierr; 5281 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5282 IS isrowa,iscola; 5283 Mat *aloc; 5284 PetscBool match; 5285 5286 PetscFunctionBegin; 5287 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5288 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5289 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5290 if (!row) { 5291 start = A->rmap->rstart; end = A->rmap->rend; 5292 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5293 } else { 5294 isrowa = *row; 5295 } 5296 if (!col) { 5297 start = A->cmap->rstart; 5298 cmap = a->garray; 5299 nzA = a->A->cmap->n; 5300 nzB = a->B->cmap->n; 5301 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5302 ncols = 0; 5303 for (i=0; i<nzB; i++) { 5304 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5305 else break; 5306 } 5307 imark = i; 5308 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5309 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5310 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5311 } else { 5312 iscola = *col; 5313 } 5314 if (scall != MAT_INITIAL_MATRIX) { 5315 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5316 aloc[0] = *A_loc; 5317 } 5318 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5319 if (!col) { /* attach global id of condensed columns */ 5320 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5321 } 5322 *A_loc = aloc[0]; 5323 ierr = PetscFree(aloc);CHKERRQ(ierr); 5324 if (!row) { 5325 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5326 } 5327 if (!col) { 5328 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5329 } 5330 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5331 PetscFunctionReturn(0); 5332 } 5333 5334 /* 5335 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5336 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5337 * on a global size. 5338 * */ 5339 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5340 { 5341 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5342 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5343 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5344 PetscMPIInt owner; 5345 PetscSFNode *iremote,*oiremote; 5346 const PetscInt *lrowindices; 5347 PetscErrorCode ierr; 5348 PetscSF sf,osf; 5349 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5350 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5351 MPI_Comm comm; 5352 ISLocalToGlobalMapping mapping; 5353 5354 PetscFunctionBegin; 5355 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5356 /* plocalsize is the number of roots 5357 * nrows is the number of leaves 5358 * */ 5359 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5360 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5361 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5362 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5363 for (i=0;i<nrows;i++) { 5364 /* Find a remote index and an owner for a row 5365 * The row could be local or remote 5366 * */ 5367 owner = 0; 5368 lidx = 0; 5369 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5370 iremote[i].index = lidx; 5371 iremote[i].rank = owner; 5372 } 5373 /* Create SF to communicate how many nonzero columns for each row */ 5374 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5375 /* SF will figure out the number of nonzero colunms for each row, and their 5376 * offsets 5377 * */ 5378 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5379 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5380 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5381 5382 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5383 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5384 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5385 roffsets[0] = 0; 5386 roffsets[1] = 0; 5387 for (i=0;i<plocalsize;i++) { 5388 /* diag */ 5389 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5390 /* off diag */ 5391 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5392 /* compute offsets so that we relative location for each row */ 5393 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5394 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5395 } 5396 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5397 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5398 /* 'r' means root, and 'l' means leaf */ 5399 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5400 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5401 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5402 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5403 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5404 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5405 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5406 dntotalcols = 0; 5407 ontotalcols = 0; 5408 ncol = 0; 5409 for (i=0;i<nrows;i++) { 5410 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5411 ncol = PetscMax(pnnz[i],ncol); 5412 /* diag */ 5413 dntotalcols += nlcols[i*2+0]; 5414 /* off diag */ 5415 ontotalcols += nlcols[i*2+1]; 5416 } 5417 /* We do not need to figure the right number of columns 5418 * since all the calculations will be done by going through the raw data 5419 * */ 5420 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5421 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5422 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5423 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5424 /* diag */ 5425 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5426 /* off diag */ 5427 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5428 /* diag */ 5429 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5430 /* off diag */ 5431 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5432 dntotalcols = 0; 5433 ontotalcols = 0; 5434 ntotalcols = 0; 5435 for (i=0;i<nrows;i++) { 5436 owner = 0; 5437 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5438 /* Set iremote for diag matrix */ 5439 for (j=0;j<nlcols[i*2+0];j++) { 5440 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5441 iremote[dntotalcols].rank = owner; 5442 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5443 ilocal[dntotalcols++] = ntotalcols++; 5444 } 5445 /* off diag */ 5446 for (j=0;j<nlcols[i*2+1];j++) { 5447 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5448 oiremote[ontotalcols].rank = owner; 5449 oilocal[ontotalcols++] = ntotalcols++; 5450 } 5451 } 5452 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5453 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5454 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5455 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5456 /* P serves as roots and P_oth is leaves 5457 * Diag matrix 5458 * */ 5459 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5460 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5461 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5462 5463 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5464 /* Off diag */ 5465 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5466 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5467 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5468 /* We operate on the matrix internal data for saving memory */ 5469 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5470 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5471 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5472 /* Convert to global indices for diag matrix */ 5473 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5474 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5475 /* We want P_oth store global indices */ 5476 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5477 /* Use memory scalable approach */ 5478 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5479 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5480 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5481 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5482 /* Convert back to local indices */ 5483 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5484 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5485 nout = 0; 5486 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5487 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5488 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5489 /* Exchange values */ 5490 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5491 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5492 /* Stop PETSc from shrinking memory */ 5493 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5494 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5495 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5496 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5497 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5498 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5499 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5500 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5501 PetscFunctionReturn(0); 5502 } 5503 5504 /* 5505 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5506 * This supports MPIAIJ and MAIJ 5507 * */ 5508 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5509 { 5510 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5511 Mat_SeqAIJ *p_oth; 5512 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5513 IS rows,map; 5514 PetscHMapI hamp; 5515 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5516 MPI_Comm comm; 5517 PetscSF sf,osf; 5518 PetscBool has; 5519 PetscErrorCode ierr; 5520 5521 PetscFunctionBegin; 5522 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5523 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5524 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5525 * and then create a submatrix (that often is an overlapping matrix) 5526 * */ 5527 if (reuse == MAT_INITIAL_MATRIX) { 5528 /* Use a hash table to figure out unique keys */ 5529 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5530 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5531 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5532 count = 0; 5533 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5534 for (i=0;i<a->B->cmap->n;i++) { 5535 key = a->garray[i]/dof; 5536 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5537 if (!has) { 5538 mapping[i] = count; 5539 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5540 } else { 5541 /* Current 'i' has the same value the previous step */ 5542 mapping[i] = count-1; 5543 } 5544 } 5545 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5546 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5547 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5548 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5549 off = 0; 5550 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5551 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5552 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5553 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5554 /* In case, the matrix was already created but users want to recreate the matrix */ 5555 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5556 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5557 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5558 ierr = ISDestroy(&map);CHKERRQ(ierr); 5559 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5560 } else if (reuse == MAT_REUSE_MATRIX) { 5561 /* If matrix was already created, we simply update values using SF objects 5562 * that as attached to the matrix ealier. 5563 * */ 5564 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5565 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5566 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5567 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5568 /* Update values in place */ 5569 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5570 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5571 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5572 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5573 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5574 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5575 PetscFunctionReturn(0); 5576 } 5577 5578 /*@C 5579 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5580 5581 Collective on Mat 5582 5583 Input Parameters: 5584 + A,B - the matrices in mpiaij format 5585 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5586 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5587 5588 Output Parameter: 5589 + rowb, colb - index sets of rows and columns of B to extract 5590 - B_seq - the sequential matrix generated 5591 5592 Level: developer 5593 5594 @*/ 5595 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5596 { 5597 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5598 PetscErrorCode ierr; 5599 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5600 IS isrowb,iscolb; 5601 Mat *bseq=NULL; 5602 5603 PetscFunctionBegin; 5604 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5605 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5606 } 5607 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5608 5609 if (scall == MAT_INITIAL_MATRIX) { 5610 start = A->cmap->rstart; 5611 cmap = a->garray; 5612 nzA = a->A->cmap->n; 5613 nzB = a->B->cmap->n; 5614 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5615 ncols = 0; 5616 for (i=0; i<nzB; i++) { /* row < local row index */ 5617 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5618 else break; 5619 } 5620 imark = i; 5621 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5622 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5623 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5624 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5625 } else { 5626 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5627 isrowb = *rowb; iscolb = *colb; 5628 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5629 bseq[0] = *B_seq; 5630 } 5631 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5632 *B_seq = bseq[0]; 5633 ierr = PetscFree(bseq);CHKERRQ(ierr); 5634 if (!rowb) { 5635 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5636 } else { 5637 *rowb = isrowb; 5638 } 5639 if (!colb) { 5640 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5641 } else { 5642 *colb = iscolb; 5643 } 5644 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5645 PetscFunctionReturn(0); 5646 } 5647 5648 /* 5649 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5650 of the OFF-DIAGONAL portion of local A 5651 5652 Collective on Mat 5653 5654 Input Parameters: 5655 + A,B - the matrices in mpiaij format 5656 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5657 5658 Output Parameter: 5659 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5660 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5661 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5662 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5663 5664 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5665 for this matrix. This is not desirable.. 5666 5667 Level: developer 5668 5669 */ 5670 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5671 { 5672 PetscErrorCode ierr; 5673 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5674 Mat_SeqAIJ *b_oth; 5675 VecScatter ctx; 5676 MPI_Comm comm; 5677 const PetscMPIInt *rprocs,*sprocs; 5678 const PetscInt *srow,*rstarts,*sstarts; 5679 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5680 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5681 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5682 MPI_Request *rwaits = NULL,*swaits = NULL; 5683 MPI_Status rstatus; 5684 PetscMPIInt size,tag,rank,nsends_mpi,nrecvs_mpi; 5685 PETSC_UNUSED PetscMPIInt jj; 5686 5687 PetscFunctionBegin; 5688 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5689 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5690 5691 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5692 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5693 } 5694 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5695 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 5696 5697 if (size == 1) { 5698 startsj_s = NULL; 5699 bufa_ptr = NULL; 5700 *B_oth = NULL; 5701 PetscFunctionReturn(0); 5702 } 5703 5704 ctx = a->Mvctx; 5705 tag = ((PetscObject)ctx)->tag; 5706 5707 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5708 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5709 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5710 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5711 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5712 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5713 5714 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5715 if (scall == MAT_INITIAL_MATRIX) { 5716 /* i-array */ 5717 /*---------*/ 5718 /* post receives */ 5719 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5720 for (i=0; i<nrecvs; i++) { 5721 rowlen = rvalues + rstarts[i]*rbs; 5722 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5723 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5724 } 5725 5726 /* pack the outgoing message */ 5727 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5728 5729 sstartsj[0] = 0; 5730 rstartsj[0] = 0; 5731 len = 0; /* total length of j or a array to be sent */ 5732 if (nsends) { 5733 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5734 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5735 } 5736 for (i=0; i<nsends; i++) { 5737 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5738 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5739 for (j=0; j<nrows; j++) { 5740 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5741 for (l=0; l<sbs; l++) { 5742 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5743 5744 rowlen[j*sbs+l] = ncols; 5745 5746 len += ncols; 5747 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5748 } 5749 k++; 5750 } 5751 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5752 5753 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5754 } 5755 /* recvs and sends of i-array are completed */ 5756 i = nrecvs; 5757 while (i--) { 5758 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr); 5759 } 5760 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5761 ierr = PetscFree(svalues);CHKERRQ(ierr); 5762 5763 /* allocate buffers for sending j and a arrays */ 5764 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5765 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5766 5767 /* create i-array of B_oth */ 5768 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5769 5770 b_othi[0] = 0; 5771 len = 0; /* total length of j or a array to be received */ 5772 k = 0; 5773 for (i=0; i<nrecvs; i++) { 5774 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5775 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5776 for (j=0; j<nrows; j++) { 5777 b_othi[k+1] = b_othi[k] + rowlen[j]; 5778 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5779 k++; 5780 } 5781 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5782 } 5783 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5784 5785 /* allocate space for j and a arrrays of B_oth */ 5786 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5787 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5788 5789 /* j-array */ 5790 /*---------*/ 5791 /* post receives of j-array */ 5792 for (i=0; i<nrecvs; i++) { 5793 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5794 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5795 } 5796 5797 /* pack the outgoing message j-array */ 5798 if (nsends) k = sstarts[0]; 5799 for (i=0; i<nsends; i++) { 5800 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5801 bufJ = bufj+sstartsj[i]; 5802 for (j=0; j<nrows; j++) { 5803 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5804 for (ll=0; ll<sbs; ll++) { 5805 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5806 for (l=0; l<ncols; l++) { 5807 *bufJ++ = cols[l]; 5808 } 5809 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5810 } 5811 } 5812 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5813 } 5814 5815 /* recvs and sends of j-array are completed */ 5816 i = nrecvs; 5817 while (i--) { 5818 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr); 5819 } 5820 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5821 } else if (scall == MAT_REUSE_MATRIX) { 5822 sstartsj = *startsj_s; 5823 rstartsj = *startsj_r; 5824 bufa = *bufa_ptr; 5825 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5826 b_otha = b_oth->a; 5827 #if defined(PETSC_HAVE_DEVICE) 5828 (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU; 5829 #endif 5830 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5831 5832 /* a-array */ 5833 /*---------*/ 5834 /* post receives of a-array */ 5835 for (i=0; i<nrecvs; i++) { 5836 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5837 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5838 } 5839 5840 /* pack the outgoing message a-array */ 5841 if (nsends) k = sstarts[0]; 5842 for (i=0; i<nsends; i++) { 5843 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5844 bufA = bufa+sstartsj[i]; 5845 for (j=0; j<nrows; j++) { 5846 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5847 for (ll=0; ll<sbs; ll++) { 5848 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5849 for (l=0; l<ncols; l++) { 5850 *bufA++ = vals[l]; 5851 } 5852 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5853 } 5854 } 5855 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5856 } 5857 /* recvs and sends of a-array are completed */ 5858 i = nrecvs; 5859 while (i--) { 5860 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr); 5861 } 5862 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5863 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5864 5865 if (scall == MAT_INITIAL_MATRIX) { 5866 /* put together the new matrix */ 5867 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5868 5869 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5870 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5871 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5872 b_oth->free_a = PETSC_TRUE; 5873 b_oth->free_ij = PETSC_TRUE; 5874 b_oth->nonew = 0; 5875 5876 ierr = PetscFree(bufj);CHKERRQ(ierr); 5877 if (!startsj_s || !bufa_ptr) { 5878 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5879 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5880 } else { 5881 *startsj_s = sstartsj; 5882 *startsj_r = rstartsj; 5883 *bufa_ptr = bufa; 5884 } 5885 } 5886 5887 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5888 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5889 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5890 PetscFunctionReturn(0); 5891 } 5892 5893 /*@C 5894 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5895 5896 Not Collective 5897 5898 Input Parameters: 5899 . A - The matrix in mpiaij format 5900 5901 Output Parameter: 5902 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5903 . colmap - A map from global column index to local index into lvec 5904 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5905 5906 Level: developer 5907 5908 @*/ 5909 #if defined(PETSC_USE_CTABLE) 5910 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5911 #else 5912 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5913 #endif 5914 { 5915 Mat_MPIAIJ *a; 5916 5917 PetscFunctionBegin; 5918 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5919 PetscValidPointer(lvec, 2); 5920 PetscValidPointer(colmap, 3); 5921 PetscValidPointer(multScatter, 4); 5922 a = (Mat_MPIAIJ*) A->data; 5923 if (lvec) *lvec = a->lvec; 5924 if (colmap) *colmap = a->colmap; 5925 if (multScatter) *multScatter = a->Mvctx; 5926 PetscFunctionReturn(0); 5927 } 5928 5929 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5930 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5931 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5932 #if defined(PETSC_HAVE_MKL_SPARSE) 5933 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5934 #endif 5935 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5936 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5937 #if defined(PETSC_HAVE_ELEMENTAL) 5938 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5939 #endif 5940 #if defined(PETSC_HAVE_SCALAPACK) 5941 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5942 #endif 5943 #if defined(PETSC_HAVE_HYPRE) 5944 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5945 #endif 5946 #if defined(PETSC_HAVE_CUDA) 5947 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5948 #endif 5949 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5950 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5951 #endif 5952 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5953 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5954 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5955 5956 /* 5957 Computes (B'*A')' since computing B*A directly is untenable 5958 5959 n p p 5960 [ ] [ ] [ ] 5961 m [ A ] * n [ B ] = m [ C ] 5962 [ ] [ ] [ ] 5963 5964 */ 5965 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5966 { 5967 PetscErrorCode ierr; 5968 Mat At,Bt,Ct; 5969 5970 PetscFunctionBegin; 5971 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5972 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5973 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5974 ierr = MatDestroy(&At);CHKERRQ(ierr); 5975 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5976 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5977 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5978 PetscFunctionReturn(0); 5979 } 5980 5981 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5982 { 5983 PetscErrorCode ierr; 5984 PetscBool cisdense; 5985 5986 PetscFunctionBegin; 5987 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5988 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 5989 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5990 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 5991 if (!cisdense) { 5992 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 5993 } 5994 ierr = MatSetUp(C);CHKERRQ(ierr); 5995 5996 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5997 PetscFunctionReturn(0); 5998 } 5999 6000 /* ----------------------------------------------------------------*/ 6001 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6002 { 6003 Mat_Product *product = C->product; 6004 Mat A = product->A,B=product->B; 6005 6006 PetscFunctionBegin; 6007 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6008 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6009 6010 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6011 C->ops->productsymbolic = MatProductSymbolic_AB; 6012 PetscFunctionReturn(0); 6013 } 6014 6015 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6016 { 6017 PetscErrorCode ierr; 6018 Mat_Product *product = C->product; 6019 6020 PetscFunctionBegin; 6021 if (product->type == MATPRODUCT_AB) { 6022 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 6023 } 6024 PetscFunctionReturn(0); 6025 } 6026 /* ----------------------------------------------------------------*/ 6027 6028 /*MC 6029 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6030 6031 Options Database Keys: 6032 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6033 6034 Level: beginner 6035 6036 Notes: 6037 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6038 in this case the values associated with the rows and columns one passes in are set to zero 6039 in the matrix 6040 6041 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6042 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6043 6044 .seealso: MatCreateAIJ() 6045 M*/ 6046 6047 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6048 { 6049 Mat_MPIAIJ *b; 6050 PetscErrorCode ierr; 6051 PetscMPIInt size; 6052 6053 PetscFunctionBegin; 6054 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 6055 6056 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6057 B->data = (void*)b; 6058 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6059 B->assembled = PETSC_FALSE; 6060 B->insertmode = NOT_SET_VALUES; 6061 b->size = size; 6062 6063 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr); 6064 6065 /* build cache for off array entries formed */ 6066 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6067 6068 b->donotstash = PETSC_FALSE; 6069 b->colmap = NULL; 6070 b->garray = NULL; 6071 b->roworiented = PETSC_TRUE; 6072 6073 /* stuff used for matrix vector multiply */ 6074 b->lvec = NULL; 6075 b->Mvctx = NULL; 6076 6077 /* stuff for MatGetRow() */ 6078 b->rowindices = NULL; 6079 b->rowvalues = NULL; 6080 b->getrowactive = PETSC_FALSE; 6081 6082 /* flexible pointer used in CUSPARSE classes */ 6083 b->spptr = NULL; 6084 6085 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6086 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6087 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6088 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6089 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6090 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6092 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6093 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6094 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6095 #if defined(PETSC_HAVE_CUDA) 6096 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr); 6097 #endif 6098 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6099 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr); 6100 #endif 6101 #if defined(PETSC_HAVE_MKL_SPARSE) 6102 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6103 #endif 6104 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6105 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6106 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6107 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr); 6108 #if defined(PETSC_HAVE_ELEMENTAL) 6109 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6110 #endif 6111 #if defined(PETSC_HAVE_SCALAPACK) 6112 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6113 #endif 6114 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6115 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6116 #if defined(PETSC_HAVE_HYPRE) 6117 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6118 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6119 #endif 6120 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6121 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6122 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6123 PetscFunctionReturn(0); 6124 } 6125 6126 /*@C 6127 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6128 and "off-diagonal" part of the matrix in CSR format. 6129 6130 Collective 6131 6132 Input Parameters: 6133 + comm - MPI communicator 6134 . m - number of local rows (Cannot be PETSC_DECIDE) 6135 . n - This value should be the same as the local size used in creating the 6136 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6137 calculated if N is given) For square matrices n is almost always m. 6138 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6139 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6140 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6141 . j - column indices 6142 . a - matrix values 6143 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6144 . oj - column indices 6145 - oa - matrix values 6146 6147 Output Parameter: 6148 . mat - the matrix 6149 6150 Level: advanced 6151 6152 Notes: 6153 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6154 must free the arrays once the matrix has been destroyed and not before. 6155 6156 The i and j indices are 0 based 6157 6158 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6159 6160 This sets local rows and cannot be used to set off-processor values. 6161 6162 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6163 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6164 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6165 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6166 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6167 communication if it is known that only local entries will be set. 6168 6169 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6170 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6171 @*/ 6172 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6173 { 6174 PetscErrorCode ierr; 6175 Mat_MPIAIJ *maij; 6176 6177 PetscFunctionBegin; 6178 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6179 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6180 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6181 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6182 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6183 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6184 maij = (Mat_MPIAIJ*) (*mat)->data; 6185 6186 (*mat)->preallocated = PETSC_TRUE; 6187 6188 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6189 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6190 6191 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6192 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6193 6194 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6195 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6196 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6197 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6198 6199 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6200 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6201 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6202 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6203 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6204 PetscFunctionReturn(0); 6205 } 6206 6207 /* 6208 Special version for direct calls from Fortran 6209 */ 6210 #include <petsc/private/fortranimpl.h> 6211 6212 /* Change these macros so can be used in void function */ 6213 #undef CHKERRQ 6214 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6215 #undef SETERRQ2 6216 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6217 #undef SETERRQ3 6218 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6219 #undef SETERRQ 6220 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6221 6222 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6223 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6224 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6225 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6226 #else 6227 #endif 6228 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6229 { 6230 Mat mat = *mmat; 6231 PetscInt m = *mm, n = *mn; 6232 InsertMode addv = *maddv; 6233 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6234 PetscScalar value; 6235 PetscErrorCode ierr; 6236 6237 MatCheckPreallocated(mat,1); 6238 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6239 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6240 { 6241 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6242 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6243 PetscBool roworiented = aij->roworiented; 6244 6245 /* Some Variables required in the macro */ 6246 Mat A = aij->A; 6247 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6248 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6249 MatScalar *aa = a->a; 6250 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6251 Mat B = aij->B; 6252 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6253 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6254 MatScalar *ba = b->a; 6255 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6256 * cannot use "#if defined" inside a macro. */ 6257 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6258 6259 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6260 PetscInt nonew = a->nonew; 6261 MatScalar *ap1,*ap2; 6262 6263 PetscFunctionBegin; 6264 for (i=0; i<m; i++) { 6265 if (im[i] < 0) continue; 6266 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6267 if (im[i] >= rstart && im[i] < rend) { 6268 row = im[i] - rstart; 6269 lastcol1 = -1; 6270 rp1 = aj + ai[row]; 6271 ap1 = aa + ai[row]; 6272 rmax1 = aimax[row]; 6273 nrow1 = ailen[row]; 6274 low1 = 0; 6275 high1 = nrow1; 6276 lastcol2 = -1; 6277 rp2 = bj + bi[row]; 6278 ap2 = ba + bi[row]; 6279 rmax2 = bimax[row]; 6280 nrow2 = bilen[row]; 6281 low2 = 0; 6282 high2 = nrow2; 6283 6284 for (j=0; j<n; j++) { 6285 if (roworiented) value = v[i*n+j]; 6286 else value = v[i+j*m]; 6287 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6288 if (in[j] >= cstart && in[j] < cend) { 6289 col = in[j] - cstart; 6290 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6291 #if defined(PETSC_HAVE_DEVICE) 6292 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6293 #endif 6294 } else if (in[j] < 0) continue; 6295 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6296 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6297 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6298 } else { 6299 if (mat->was_assembled) { 6300 if (!aij->colmap) { 6301 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6302 } 6303 #if defined(PETSC_USE_CTABLE) 6304 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6305 col--; 6306 #else 6307 col = aij->colmap[in[j]] - 1; 6308 #endif 6309 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6310 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6311 col = in[j]; 6312 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6313 B = aij->B; 6314 b = (Mat_SeqAIJ*)B->data; 6315 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6316 rp2 = bj + bi[row]; 6317 ap2 = ba + bi[row]; 6318 rmax2 = bimax[row]; 6319 nrow2 = bilen[row]; 6320 low2 = 0; 6321 high2 = nrow2; 6322 bm = aij->B->rmap->n; 6323 ba = b->a; 6324 inserted = PETSC_FALSE; 6325 } 6326 } else col = in[j]; 6327 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6328 #if defined(PETSC_HAVE_DEVICE) 6329 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6330 #endif 6331 } 6332 } 6333 } else if (!aij->donotstash) { 6334 if (roworiented) { 6335 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6336 } else { 6337 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6338 } 6339 } 6340 } 6341 } 6342 PetscFunctionReturnVoid(); 6343 } 6344 6345 typedef struct { 6346 Mat *mp; /* intermediate products */ 6347 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6348 PetscInt cp; /* number of intermediate products */ 6349 6350 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6351 PetscInt *startsj_s,*startsj_r; 6352 PetscScalar *bufa; 6353 Mat P_oth; 6354 6355 /* may take advantage of merging product->B */ 6356 Mat Bloc; 6357 6358 /* cusparse does not have support to split between symbolic and numeric phases 6359 When api_user is true, we don't need to update the numerical values 6360 of the temporary storage */ 6361 PetscBool reusesym; 6362 6363 /* support for COO values insertion */ 6364 PetscScalar *coo_v,*coo_w; 6365 PetscInt **own; 6366 PetscInt **off; 6367 PetscBool hasoffproc; /* if true, non-local values insertion (i.e. AtB or PtAP) */ 6368 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6369 PetscMemType mtype; 6370 6371 /* customization */ 6372 PetscBool abmerge; 6373 PetscBool P_oth_bind; 6374 } MatMatMPIAIJBACKEND; 6375 6376 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6377 { 6378 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6379 PetscInt i; 6380 PetscErrorCode ierr; 6381 6382 PetscFunctionBegin; 6383 ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr); 6384 ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr); 6385 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr); 6386 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr); 6387 ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr); 6388 ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr); 6389 ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr); 6390 for (i = 0; i < mmdata->cp; i++) { 6391 ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr); 6392 } 6393 ierr = PetscFree(mmdata->mp);CHKERRQ(ierr); 6394 ierr = PetscFree(mmdata->mptmp);CHKERRQ(ierr); 6395 ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr); 6396 ierr = PetscFree(mmdata->own);CHKERRQ(ierr); 6397 ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr); 6398 ierr = PetscFree(mmdata->off);CHKERRQ(ierr); 6399 ierr = PetscFree(mmdata);CHKERRQ(ierr); 6400 PetscFunctionReturn(0); 6401 } 6402 6403 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6404 { 6405 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6406 PetscErrorCode ierr; 6407 6408 PetscFunctionBegin; 6409 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr); 6410 if (f) { 6411 ierr = (*f)(A,n,idx,v);CHKERRQ(ierr); 6412 } else { 6413 const PetscScalar *vv; 6414 6415 ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr); 6416 if (n && idx) { 6417 PetscScalar *w = v; 6418 const PetscInt *oi = idx; 6419 PetscInt j; 6420 6421 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6422 } else { 6423 ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr); 6424 } 6425 ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr); 6426 } 6427 PetscFunctionReturn(0); 6428 } 6429 6430 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6431 { 6432 MatMatMPIAIJBACKEND *mmdata; 6433 PetscInt i,n_d,n_o; 6434 PetscErrorCode ierr; 6435 6436 PetscFunctionBegin; 6437 MatCheckProduct(C,1); 6438 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6439 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6440 if (!mmdata->reusesym) { /* update temporary matrices */ 6441 if (mmdata->P_oth) { 6442 ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6443 } 6444 if (mmdata->Bloc) { 6445 ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr); 6446 } 6447 } 6448 mmdata->reusesym = PETSC_FALSE; 6449 6450 for (i = 0; i < mmdata->cp; i++) { 6451 if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6452 ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr); 6453 } 6454 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6455 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6456 6457 if (mmdata->mptmp[i]) continue; 6458 if (noff) { 6459 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6460 6461 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr); 6462 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr); 6463 n_o += noff; 6464 n_d += nown; 6465 } else { 6466 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6467 6468 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr); 6469 n_d += mm->nz; 6470 } 6471 } 6472 if (mmdata->hasoffproc) { /* offprocess insertion */ 6473 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6474 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6475 } 6476 ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr); 6477 PetscFunctionReturn(0); 6478 } 6479 6480 /* Support for Pt * A, A * P, or Pt * A * P */ 6481 #define MAX_NUMBER_INTERMEDIATE 4 6482 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6483 { 6484 Mat_Product *product = C->product; 6485 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; 6486 Mat_MPIAIJ *a,*p; 6487 MatMatMPIAIJBACKEND *mmdata; 6488 ISLocalToGlobalMapping P_oth_l2g = NULL; 6489 IS glob = NULL; 6490 const char *prefix; 6491 char pprefix[256]; 6492 const PetscInt *globidx,*P_oth_idx; 6493 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; 6494 PetscInt cp = 0,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j,cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE],i,j; 6495 MatProductType ptype; 6496 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6497 PetscMPIInt size; 6498 PetscErrorCode ierr; 6499 6500 PetscFunctionBegin; 6501 MatCheckProduct(C,1); 6502 if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6503 ptype = product->type; 6504 if (product->A->symmetric && ptype == MATPRODUCT_AtB) ptype = MATPRODUCT_AB; 6505 switch (ptype) { 6506 case MATPRODUCT_AB: 6507 A = product->A; 6508 P = product->B; 6509 m = A->rmap->n; 6510 n = P->cmap->n; 6511 M = A->rmap->N; 6512 N = P->cmap->N; 6513 break; 6514 case MATPRODUCT_AtB: 6515 P = product->A; 6516 A = product->B; 6517 m = P->cmap->n; 6518 n = A->cmap->n; 6519 M = P->cmap->N; 6520 N = A->cmap->N; 6521 hasoffproc = PETSC_TRUE; 6522 break; 6523 case MATPRODUCT_PtAP: 6524 A = product->A; 6525 P = product->B; 6526 m = P->cmap->n; 6527 n = P->cmap->n; 6528 M = P->cmap->N; 6529 N = P->cmap->N; 6530 hasoffproc = PETSC_TRUE; 6531 break; 6532 default: 6533 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6534 } 6535 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr); 6536 if (size == 1) hasoffproc = PETSC_FALSE; 6537 6538 /* defaults */ 6539 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6540 mp[i] = NULL; 6541 mptmp[i] = PETSC_FALSE; 6542 rmapt[i] = -1; 6543 cmapt[i] = -1; 6544 rmapa[i] = NULL; 6545 cmapa[i] = NULL; 6546 } 6547 6548 /* customization */ 6549 ierr = PetscNew(&mmdata);CHKERRQ(ierr); 6550 mmdata->reusesym = product->api_user; 6551 if (ptype == MATPRODUCT_AB) { 6552 if (product->api_user) { 6553 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 6554 ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6555 ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6556 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6557 } else { 6558 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 6559 ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6560 ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6561 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6562 } 6563 } else if (ptype == MATPRODUCT_PtAP) { 6564 if (product->api_user) { 6565 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 6566 ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6567 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6568 } else { 6569 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 6570 ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6571 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6572 } 6573 } 6574 a = (Mat_MPIAIJ*)A->data; 6575 p = (Mat_MPIAIJ*)P->data; 6576 ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr); 6577 ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr); 6578 ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr); 6579 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6580 ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr); 6581 switch (ptype) { 6582 case MATPRODUCT_AB: /* A * P */ 6583 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6584 6585 if (mmdata->abmerge) { /* A_diag * P_loc and A_off * P_oth */ 6586 /* P is product->B */ 6587 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6588 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6589 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6590 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6591 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6592 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6593 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6594 mp[cp]->product->api_user = product->api_user; 6595 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6596 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6597 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6598 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6599 rmapt[cp] = 1; 6600 cmapt[cp] = 2; 6601 cmapa[cp] = globidx; 6602 mptmp[cp] = PETSC_FALSE; 6603 cp++; 6604 } else { /* A_diag * P_diag and A_diag * P_off and A_off * P_oth */ 6605 ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr); 6606 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6607 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6608 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6609 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6610 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6611 mp[cp]->product->api_user = product->api_user; 6612 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6613 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6614 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6615 rmapt[cp] = 1; 6616 cmapt[cp] = 1; 6617 mptmp[cp] = PETSC_FALSE; 6618 cp++; 6619 ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr); 6620 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6621 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6622 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6623 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6624 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6625 mp[cp]->product->api_user = product->api_user; 6626 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6627 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6628 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6629 rmapt[cp] = 1; 6630 cmapt[cp] = 2; 6631 cmapa[cp] = p->garray; 6632 mptmp[cp] = PETSC_FALSE; 6633 cp++; 6634 } 6635 if (mmdata->P_oth) { 6636 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); 6637 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6638 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6639 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6640 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6641 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6642 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6643 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6644 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6645 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6646 mp[cp]->product->api_user = product->api_user; 6647 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6648 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6649 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6650 rmapt[cp] = 1; 6651 cmapt[cp] = 2; 6652 cmapa[cp] = P_oth_idx; 6653 mptmp[cp] = PETSC_FALSE; 6654 cp++; 6655 } 6656 break; 6657 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 6658 /* A is product->B */ 6659 ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6660 if (A == P) { 6661 ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6662 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6663 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6664 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6665 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6666 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6667 mp[cp]->product->api_user = product->api_user; 6668 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6669 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6670 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6671 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6672 rmapt[cp] = 2; 6673 rmapa[cp] = globidx; 6674 cmapt[cp] = 2; 6675 cmapa[cp] = globidx; 6676 mptmp[cp] = PETSC_FALSE; 6677 cp++; 6678 } else { 6679 ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6680 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6681 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6682 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6683 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6684 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6685 mp[cp]->product->api_user = product->api_user; 6686 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6687 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6688 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6689 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6690 rmapt[cp] = 1; 6691 cmapt[cp] = 2; 6692 cmapa[cp] = globidx; 6693 mptmp[cp] = PETSC_FALSE; 6694 cp++; 6695 ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6696 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6697 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6698 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6699 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6700 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6701 mp[cp]->product->api_user = product->api_user; 6702 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6703 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6704 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6705 rmapt[cp] = 2; 6706 rmapa[cp] = p->garray; 6707 cmapt[cp] = 2; 6708 cmapa[cp] = globidx; 6709 mptmp[cp] = PETSC_FALSE; 6710 cp++; 6711 } 6712 break; 6713 case MATPRODUCT_PtAP: 6714 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6715 /* P is product->B */ 6716 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6717 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6718 ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr); 6719 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6720 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6721 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6722 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6723 mp[cp]->product->api_user = product->api_user; 6724 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6725 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6726 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6727 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6728 rmapt[cp] = 2; 6729 rmapa[cp] = globidx; 6730 cmapt[cp] = 2; 6731 cmapa[cp] = globidx; 6732 mptmp[cp] = PETSC_FALSE; 6733 cp++; 6734 if (mmdata->P_oth) { 6735 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); 6736 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6737 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6738 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6739 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6740 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6741 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6742 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6743 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6744 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6745 mp[cp]->product->api_user = product->api_user; 6746 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6747 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6748 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6749 mptmp[cp] = PETSC_TRUE; 6750 cp++; 6751 ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr); 6752 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6753 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6754 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6755 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6756 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6757 mp[cp]->product->api_user = product->api_user; 6758 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6759 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6760 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6761 rmapt[cp] = 2; 6762 rmapa[cp] = globidx; 6763 cmapt[cp] = 2; 6764 cmapa[cp] = P_oth_idx; 6765 mptmp[cp] = PETSC_FALSE; 6766 cp++; 6767 } 6768 break; 6769 default: 6770 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6771 } 6772 /* sanity check */ 6773 if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i); 6774 6775 ierr = PetscMalloc1(cp,&mmdata->mp);CHKERRQ(ierr); 6776 for (i = 0; i < cp; i++) mmdata->mp[i] = mp[i]; 6777 ierr = PetscMalloc1(cp,&mmdata->mptmp);CHKERRQ(ierr); 6778 for (i = 0; i < cp; i++) mmdata->mptmp[i] = mptmp[i]; 6779 mmdata->cp = cp; 6780 C->product->data = mmdata; 6781 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 6782 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 6783 6784 /* memory type */ 6785 mmdata->mtype = PETSC_MEMTYPE_HOST; 6786 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr); 6787 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr); 6788 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 6789 // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented 6790 //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE; 6791 6792 /* prepare coo coordinates for values insertion */ 6793 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 6794 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6795 if (mptmp[cp]) continue; 6796 if (rmapt[cp] == 2 && hasoffproc) { 6797 const PetscInt *rmap = rmapa[cp]; 6798 const PetscInt mr = mp[cp]->rmap->n; 6799 const PetscInt rs = C->rmap->rstart; 6800 const PetscInt re = C->rmap->rend; 6801 const PetscInt *ii = mm->i; 6802 for (i = 0; i < mr; i++) { 6803 const PetscInt gr = rmap[i]; 6804 const PetscInt nz = ii[i+1] - ii[i]; 6805 if (gr < rs || gr >= re) ncoo_o += nz; 6806 else ncoo_oown += nz; 6807 } 6808 } else ncoo_d += mm->nz; 6809 } 6810 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); 6811 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr); 6812 if (hasoffproc) { /* handle offproc values insertion */ 6813 PetscSF msf; 6814 PetscInt ncoo2,*coo_i2,*coo_j2; 6815 6816 ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr); 6817 ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr); 6818 ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); 6819 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 6820 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6821 PetscInt *idxoff = mmdata->off[cp]; 6822 PetscInt *idxown = mmdata->own[cp]; 6823 if (!mptmp[cp] && rmapt[cp] == 2) { 6824 const PetscInt *rmap = rmapa[cp]; 6825 const PetscInt *cmap = cmapa[cp]; 6826 const PetscInt *ii = mm->i; 6827 PetscInt *coi = coo_i + ncoo_o; 6828 PetscInt *coj = coo_j + ncoo_o; 6829 const PetscInt mr = mp[cp]->rmap->n; 6830 const PetscInt rs = C->rmap->rstart; 6831 const PetscInt re = C->rmap->rend; 6832 const PetscInt cs = C->cmap->rstart; 6833 for (i = 0; i < mr; i++) { 6834 const PetscInt *jj = mm->j + ii[i]; 6835 const PetscInt gr = rmap[i]; 6836 const PetscInt nz = ii[i+1] - ii[i]; 6837 if (gr < rs || gr >= re) { 6838 for (j = ii[i]; j < ii[i+1]; j++) { 6839 *coi++ = gr; 6840 *idxoff++ = j; 6841 } 6842 if (!cmapt[cp]) { /* already global */ 6843 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6844 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6845 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6846 } else { /* offdiag */ 6847 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6848 } 6849 ncoo_o += nz; 6850 } else { 6851 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 6852 } 6853 } 6854 } 6855 mmdata->off[cp + 1] = idxoff; 6856 mmdata->own[cp + 1] = idxown; 6857 } 6858 6859 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6860 ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o,NULL,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr); 6861 ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr); 6862 ierr = PetscSFGetGraph(msf,&ncoo2,NULL,NULL,NULL);CHKERRQ(ierr); 6863 ncoo = ncoo_d + ncoo_oown + ncoo2; 6864 ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr); 6865 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6866 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6867 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6868 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6869 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6870 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr); 6871 coo_i = coo_i2; 6872 coo_j = coo_j2; 6873 } else { /* no offproc values insertion */ 6874 ncoo = ncoo_d; 6875 ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr); 6876 6877 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6878 ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr); 6879 ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr); 6880 } 6881 mmdata->hasoffproc = hasoffproc; 6882 6883 /* on-process indices */ 6884 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 6885 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6886 PetscInt *coi = coo_i + ncoo_d; 6887 PetscInt *coj = coo_j + ncoo_d; 6888 const PetscInt *jj = mm->j; 6889 const PetscInt *ii = mm->i; 6890 const PetscInt *cmap = cmapa[cp]; 6891 const PetscInt *rmap = rmapa[cp]; 6892 const PetscInt mr = mp[cp]->rmap->n; 6893 const PetscInt rs = C->rmap->rstart; 6894 const PetscInt re = C->rmap->rend; 6895 const PetscInt cs = C->cmap->rstart; 6896 6897 if (mptmp[cp]) continue; 6898 if (rmapt[cp] == 1) { 6899 for (i = 0; i < mr; i++) { 6900 const PetscInt gr = i + rs; 6901 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 6902 } 6903 /* columns coo */ 6904 if (!cmapt[cp]) { 6905 ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr); 6906 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6907 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; 6908 } else { /* offdiag */ 6909 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 6910 } 6911 ncoo_d += mm->nz; 6912 } else if (rmapt[cp] == 2) { 6913 for (i = 0; i < mr; i++) { 6914 const PetscInt *jj = mm->j + ii[i]; 6915 const PetscInt gr = rmap[i]; 6916 const PetscInt nz = ii[i+1] - ii[i]; 6917 if (gr >= rs && gr < re) { 6918 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 6919 if (!cmapt[cp]) { /* already global */ 6920 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6921 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6922 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6923 } else { /* offdiag */ 6924 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6925 } 6926 ncoo_d += nz; 6927 } 6928 } 6929 } 6930 } 6931 if (glob) { 6932 ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr); 6933 } 6934 ierr = ISDestroy(&glob);CHKERRQ(ierr); 6935 if (P_oth_l2g) { 6936 ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6937 } 6938 ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr); 6939 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr); 6940 6941 /* preallocate with COO data */ 6942 ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr); 6943 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6944 PetscFunctionReturn(0); 6945 } 6946 6947 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 6948 { 6949 Mat_Product *product = mat->product; 6950 PetscErrorCode ierr; 6951 #if defined(PETSC_HAVE_DEVICE) 6952 PetscBool match = PETSC_FALSE; 6953 PetscBool usecpu = PETSC_FALSE; 6954 #else 6955 PetscBool match = PETSC_TRUE; 6956 #endif 6957 6958 PetscFunctionBegin; 6959 MatCheckProduct(mat,1); 6960 #if defined(PETSC_HAVE_DEVICE) 6961 if (!product->A->boundtocpu && !product->B->boundtocpu) { 6962 ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr); 6963 } 6964 if (match) { /* we can always fallback to CPU in case an operation is not performing on the device */ 6965 switch (product->type) { 6966 case MATPRODUCT_AB: 6967 if (product->api_user) { 6968 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 6969 ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6970 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6971 } else { 6972 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 6973 ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6974 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6975 } 6976 break; 6977 case MATPRODUCT_AtB: 6978 if (product->api_user) { 6979 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr); 6980 ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6981 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6982 } else { 6983 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr); 6984 ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6985 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6986 } 6987 break; 6988 case MATPRODUCT_PtAP: 6989 if (product->api_user) { 6990 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 6991 ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6992 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6993 } else { 6994 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 6995 ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6996 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6997 } 6998 break; 6999 default: 7000 break; 7001 } 7002 match = (PetscBool)!usecpu; 7003 } 7004 #endif 7005 if (match) { 7006 switch (product->type) { 7007 case MATPRODUCT_AB: 7008 case MATPRODUCT_AtB: 7009 case MATPRODUCT_PtAP: 7010 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7011 break; 7012 default: 7013 break; 7014 } 7015 } 7016 /* fallback to MPIAIJ ops */ 7017 if (!mat->ops->productsymbolic) { 7018 ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr); 7019 } 7020 PetscFunctionReturn(0); 7021 } 7022