1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 66 { 67 PetscErrorCode ierr; 68 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 69 70 PetscFunctionBegin; 71 if (mat->A) { 72 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 73 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 74 } 75 PetscFunctionReturn(0); 76 } 77 78 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 79 { 80 PetscErrorCode ierr; 81 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 82 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 83 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 84 const PetscInt *ia,*ib; 85 const MatScalar *aa,*bb,*aav,*bav; 86 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 87 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 88 89 PetscFunctionBegin; 90 *keptrows = NULL; 91 92 ia = a->i; 93 ib = b->i; 94 ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr); 95 ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr); 96 for (i=0; i<m; i++) { 97 na = ia[i+1] - ia[i]; 98 nb = ib[i+1] - ib[i]; 99 if (!na && !nb) { 100 cnt++; 101 goto ok1; 102 } 103 aa = aav + ia[i]; 104 for (j=0; j<na; j++) { 105 if (aa[j] != 0.0) goto ok1; 106 } 107 bb = bav + ib[i]; 108 for (j=0; j <nb; j++) { 109 if (bb[j] != 0.0) goto ok1; 110 } 111 cnt++; 112 ok1:; 113 } 114 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr); 115 if (!n0rows) { 116 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 117 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 121 cnt = 0; 122 for (i=0; i<m; i++) { 123 na = ia[i+1] - ia[i]; 124 nb = ib[i+1] - ib[i]; 125 if (!na && !nb) continue; 126 aa = aav + ia[i]; 127 for (j=0; j<na;j++) { 128 if (aa[j] != 0.0) { 129 rows[cnt++] = rstart + i; 130 goto ok2; 131 } 132 } 133 bb = bav + ib[i]; 134 for (j=0; j<nb; j++) { 135 if (bb[j] != 0.0) { 136 rows[cnt++] = rstart + i; 137 goto ok2; 138 } 139 } 140 ok2:; 141 } 142 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 143 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 144 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 148 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 149 { 150 PetscErrorCode ierr; 151 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 152 PetscBool cong; 153 154 PetscFunctionBegin; 155 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 156 if (Y->assembled && cong) { 157 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 158 } else { 159 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 160 } 161 PetscFunctionReturn(0); 162 } 163 164 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 165 { 166 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 167 PetscErrorCode ierr; 168 PetscInt i,rstart,nrows,*rows; 169 170 PetscFunctionBegin; 171 *zrows = NULL; 172 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 173 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 174 for (i=0; i<nrows; i++) rows[i] += rstart; 175 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 176 PetscFunctionReturn(0); 177 } 178 179 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 180 { 181 PetscErrorCode ierr; 182 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 183 PetscInt i,n,*garray = aij->garray; 184 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 185 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 186 PetscReal *work; 187 const PetscScalar *dummy; 188 189 PetscFunctionBegin; 190 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 191 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 192 ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr); 193 ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr); 194 ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr); 195 ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr); 196 if (type == NORM_2) { 197 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 198 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 199 } 200 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 201 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 202 } 203 } else if (type == NORM_1) { 204 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 205 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 206 } 207 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 208 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 209 } 210 } else if (type == NORM_INFINITY) { 211 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 212 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 213 } 214 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 215 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 216 } 217 218 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 219 if (type == NORM_INFINITY) { 220 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 221 } else { 222 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 223 } 224 ierr = PetscFree(work);CHKERRQ(ierr); 225 if (type == NORM_2) { 226 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 227 } 228 PetscFunctionReturn(0); 229 } 230 231 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 232 { 233 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 234 IS sis,gis; 235 PetscErrorCode ierr; 236 const PetscInt *isis,*igis; 237 PetscInt n,*iis,nsis,ngis,rstart,i; 238 239 PetscFunctionBegin; 240 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 241 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 242 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 243 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 244 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 245 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 246 247 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 248 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 249 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 250 n = ngis + nsis; 251 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 252 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 253 for (i=0; i<n; i++) iis[i] += rstart; 254 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 255 256 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 257 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 258 ierr = ISDestroy(&sis);CHKERRQ(ierr); 259 ierr = ISDestroy(&gis);CHKERRQ(ierr); 260 PetscFunctionReturn(0); 261 } 262 263 /* 264 Local utility routine that creates a mapping from the global column 265 number to the local number in the off-diagonal part of the local 266 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 267 a slightly higher hash table cost; without it it is not scalable (each processor 268 has an order N integer array but is fast to access. 269 */ 270 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 271 { 272 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 273 PetscErrorCode ierr; 274 PetscInt n = aij->B->cmap->n,i; 275 276 PetscFunctionBegin; 277 if (n && !aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 278 #if defined(PETSC_USE_CTABLE) 279 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 280 for (i=0; i<n; i++) { 281 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 282 } 283 #else 284 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 285 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 286 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 287 #endif 288 PetscFunctionReturn(0); 289 } 290 291 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 292 { \ 293 if (col <= lastcol1) low1 = 0; \ 294 else high1 = nrow1; \ 295 lastcol1 = col;\ 296 while (high1-low1 > 5) { \ 297 t = (low1+high1)/2; \ 298 if (rp1[t] > col) high1 = t; \ 299 else low1 = t; \ 300 } \ 301 for (_i=low1; _i<high1; _i++) { \ 302 if (rp1[_i] > col) break; \ 303 if (rp1[_i] == col) { \ 304 if (addv == ADD_VALUES) { \ 305 ap1[_i] += value; \ 306 /* Not sure LogFlops will slow dow the code or not */ \ 307 (void)PetscLogFlops(1.0); \ 308 } \ 309 else ap1[_i] = value; \ 310 inserted = PETSC_TRUE; \ 311 goto a_noinsert; \ 312 } \ 313 } \ 314 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 315 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 316 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 317 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 318 N = nrow1++ - 1; a->nz++; high1++; \ 319 /* shift up all the later entries in this row */ \ 320 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 321 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 322 rp1[_i] = col; \ 323 ap1[_i] = value; \ 324 A->nonzerostate++;\ 325 a_noinsert: ; \ 326 ailen[row] = nrow1; \ 327 } 328 329 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 330 { \ 331 if (col <= lastcol2) low2 = 0; \ 332 else high2 = nrow2; \ 333 lastcol2 = col; \ 334 while (high2-low2 > 5) { \ 335 t = (low2+high2)/2; \ 336 if (rp2[t] > col) high2 = t; \ 337 else low2 = t; \ 338 } \ 339 for (_i=low2; _i<high2; _i++) { \ 340 if (rp2[_i] > col) break; \ 341 if (rp2[_i] == col) { \ 342 if (addv == ADD_VALUES) { \ 343 ap2[_i] += value; \ 344 (void)PetscLogFlops(1.0); \ 345 } \ 346 else ap2[_i] = value; \ 347 inserted = PETSC_TRUE; \ 348 goto b_noinsert; \ 349 } \ 350 } \ 351 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 352 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 353 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 354 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 355 N = nrow2++ - 1; b->nz++; high2++; \ 356 /* shift up all the later entries in this row */ \ 357 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 358 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 359 rp2[_i] = col; \ 360 ap2[_i] = value; \ 361 B->nonzerostate++; \ 362 b_noinsert: ; \ 363 bilen[row] = nrow2; \ 364 } 365 366 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 367 { 368 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 369 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 370 PetscErrorCode ierr; 371 PetscInt l,*garray = mat->garray,diag; 372 373 PetscFunctionBegin; 374 /* code only works for square matrices A */ 375 376 /* find size of row to the left of the diagonal part */ 377 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 378 row = row - diag; 379 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 380 if (garray[b->j[b->i[row]+l]] > diag) break; 381 } 382 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 383 384 /* diagonal part */ 385 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 386 387 /* right of diagonal part */ 388 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 389 #if defined(PETSC_HAVE_DEVICE) 390 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 391 #endif 392 PetscFunctionReturn(0); 393 } 394 395 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 396 { 397 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 398 PetscScalar value = 0.0; 399 PetscErrorCode ierr; 400 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 401 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 402 PetscBool roworiented = aij->roworiented; 403 404 /* Some Variables required in the macro */ 405 Mat A = aij->A; 406 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 407 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 408 PetscBool ignorezeroentries = a->ignorezeroentries; 409 Mat B = aij->B; 410 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 411 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 412 MatScalar *aa,*ba; 413 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 414 * cannot use "#if defined" inside a macro. */ 415 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 416 417 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 418 PetscInt nonew; 419 MatScalar *ap1,*ap2; 420 421 PetscFunctionBegin; 422 #if defined(PETSC_HAVE_DEVICE) 423 if (A->offloadmask == PETSC_OFFLOAD_GPU) { 424 const PetscScalar *dummy; 425 ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr); 426 ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr); 427 } 428 if (B->offloadmask == PETSC_OFFLOAD_GPU) { 429 const PetscScalar *dummy; 430 ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr); 431 ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr); 432 } 433 #endif 434 aa = a->a; 435 ba = b->a; 436 for (i=0; i<m; i++) { 437 if (im[i] < 0) continue; 438 if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 439 if (im[i] >= rstart && im[i] < rend) { 440 row = im[i] - rstart; 441 lastcol1 = -1; 442 rp1 = aj + ai[row]; 443 ap1 = aa + ai[row]; 444 rmax1 = aimax[row]; 445 nrow1 = ailen[row]; 446 low1 = 0; 447 high1 = nrow1; 448 lastcol2 = -1; 449 rp2 = bj + bi[row]; 450 ap2 = ba + bi[row]; 451 rmax2 = bimax[row]; 452 nrow2 = bilen[row]; 453 low2 = 0; 454 high2 = nrow2; 455 456 for (j=0; j<n; j++) { 457 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 458 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 459 if (in[j] >= cstart && in[j] < cend) { 460 col = in[j] - cstart; 461 nonew = a->nonew; 462 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 463 #if defined(PETSC_HAVE_DEVICE) 464 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 465 #endif 466 } else if (in[j] < 0) continue; 467 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 468 else { 469 if (mat->was_assembled) { 470 if (!aij->colmap) { 471 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 472 } 473 #if defined(PETSC_USE_CTABLE) 474 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 475 col--; 476 #else 477 col = aij->colmap[in[j]] - 1; 478 #endif 479 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 480 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 481 col = in[j]; 482 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 483 B = aij->B; 484 b = (Mat_SeqAIJ*)B->data; 485 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 486 rp2 = bj + bi[row]; 487 ap2 = ba + bi[row]; 488 rmax2 = bimax[row]; 489 nrow2 = bilen[row]; 490 low2 = 0; 491 high2 = nrow2; 492 bm = aij->B->rmap->n; 493 ba = b->a; 494 inserted = PETSC_FALSE; 495 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 496 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 497 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 498 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 499 } 500 } else col = in[j]; 501 nonew = b->nonew; 502 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 503 #if defined(PETSC_HAVE_DEVICE) 504 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 505 #endif 506 } 507 } 508 } else { 509 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 510 if (!aij->donotstash) { 511 mat->assembled = PETSC_FALSE; 512 if (roworiented) { 513 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 514 } else { 515 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 516 } 517 } 518 } 519 } 520 PetscFunctionReturn(0); 521 } 522 523 /* 524 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 525 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 526 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 527 */ 528 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 529 { 530 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 531 Mat A = aij->A; /* diagonal part of the matrix */ 532 Mat B = aij->B; /* offdiagonal part of the matrix */ 533 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 534 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 535 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 536 PetscInt *ailen = a->ilen,*aj = a->j; 537 PetscInt *bilen = b->ilen,*bj = b->j; 538 PetscInt am = aij->A->rmap->n,j; 539 PetscInt diag_so_far = 0,dnz; 540 PetscInt offd_so_far = 0,onz; 541 542 PetscFunctionBegin; 543 /* Iterate over all rows of the matrix */ 544 for (j=0; j<am; j++) { 545 dnz = onz = 0; 546 /* Iterate over all non-zero columns of the current row */ 547 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 548 /* If column is in the diagonal */ 549 if (mat_j[col] >= cstart && mat_j[col] < cend) { 550 aj[diag_so_far++] = mat_j[col] - cstart; 551 dnz++; 552 } else { /* off-diagonal entries */ 553 bj[offd_so_far++] = mat_j[col]; 554 onz++; 555 } 556 } 557 ailen[j] = dnz; 558 bilen[j] = onz; 559 } 560 PetscFunctionReturn(0); 561 } 562 563 /* 564 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 565 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 566 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 567 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 568 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 569 */ 570 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 571 { 572 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 573 Mat A = aij->A; /* diagonal part of the matrix */ 574 Mat B = aij->B; /* offdiagonal part of the matrix */ 575 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 576 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 577 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 578 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 579 PetscInt *ailen = a->ilen,*aj = a->j; 580 PetscInt *bilen = b->ilen,*bj = b->j; 581 PetscInt am = aij->A->rmap->n,j; 582 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 583 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 584 PetscScalar *aa = a->a,*ba = b->a; 585 586 PetscFunctionBegin; 587 /* Iterate over all rows of the matrix */ 588 for (j=0; j<am; j++) { 589 dnz_row = onz_row = 0; 590 rowstart_offd = full_offd_i[j]; 591 rowstart_diag = full_diag_i[j]; 592 /* Iterate over all non-zero columns of the current row */ 593 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 594 /* If column is in the diagonal */ 595 if (mat_j[col] >= cstart && mat_j[col] < cend) { 596 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 597 aa[rowstart_diag+dnz_row] = mat_a[col]; 598 dnz_row++; 599 } else { /* off-diagonal entries */ 600 bj[rowstart_offd+onz_row] = mat_j[col]; 601 ba[rowstart_offd+onz_row] = mat_a[col]; 602 onz_row++; 603 } 604 } 605 ailen[j] = dnz_row; 606 bilen[j] = onz_row; 607 } 608 PetscFunctionReturn(0); 609 } 610 611 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 612 { 613 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 614 PetscErrorCode ierr; 615 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 616 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 617 618 PetscFunctionBegin; 619 for (i=0; i<m; i++) { 620 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 621 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 622 if (idxm[i] >= rstart && idxm[i] < rend) { 623 row = idxm[i] - rstart; 624 for (j=0; j<n; j++) { 625 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 626 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 627 if (idxn[j] >= cstart && idxn[j] < cend) { 628 col = idxn[j] - cstart; 629 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 630 } else { 631 if (!aij->colmap) { 632 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 633 } 634 #if defined(PETSC_USE_CTABLE) 635 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 636 col--; 637 #else 638 col = aij->colmap[idxn[j]] - 1; 639 #endif 640 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 641 else { 642 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 643 } 644 } 645 } 646 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 647 } 648 PetscFunctionReturn(0); 649 } 650 651 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 652 { 653 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 654 PetscErrorCode ierr; 655 PetscInt nstash,reallocs; 656 657 PetscFunctionBegin; 658 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 659 660 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 661 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 662 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 667 { 668 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 669 PetscErrorCode ierr; 670 PetscMPIInt n; 671 PetscInt i,j,rstart,ncols,flg; 672 PetscInt *row,*col; 673 PetscBool other_disassembled; 674 PetscScalar *val; 675 676 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 677 678 PetscFunctionBegin; 679 if (!aij->donotstash && !mat->nooffprocentries) { 680 while (1) { 681 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 682 if (!flg) break; 683 684 for (i=0; i<n;) { 685 /* Now identify the consecutive vals belonging to the same row */ 686 for (j=i,rstart=row[j]; j<n; j++) { 687 if (row[j] != rstart) break; 688 } 689 if (j < n) ncols = j-i; 690 else ncols = n-i; 691 /* Now assemble all these values with a single function call */ 692 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 693 i = j; 694 } 695 } 696 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 697 } 698 #if defined(PETSC_HAVE_DEVICE) 699 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 700 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 701 if (mat->boundtocpu) { 702 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 703 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 704 } 705 #endif 706 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 707 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 708 709 /* determine if any processor has disassembled, if so we must 710 also disassemble ourself, in order that we may reassemble. */ 711 /* 712 if nonzero structure of submatrix B cannot change then we know that 713 no processor disassembled thus we can skip this stuff 714 */ 715 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 716 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 717 if (mat->was_assembled && !other_disassembled) { 718 #if defined(PETSC_HAVE_DEVICE) 719 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 720 #endif 721 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 722 } 723 } 724 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 725 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 726 } 727 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 728 #if defined(PETSC_HAVE_DEVICE) 729 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 730 #endif 731 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 733 734 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 735 736 aij->rowvalues = NULL; 737 738 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 739 740 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 741 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 742 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 743 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 744 } 745 #if defined(PETSC_HAVE_DEVICE) 746 mat->offloadmask = PETSC_OFFLOAD_BOTH; 747 #endif 748 PetscFunctionReturn(0); 749 } 750 751 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 752 { 753 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 754 PetscErrorCode ierr; 755 756 PetscFunctionBegin; 757 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 758 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 759 PetscFunctionReturn(0); 760 } 761 762 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 763 { 764 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 765 PetscObjectState sA, sB; 766 PetscInt *lrows; 767 PetscInt r, len; 768 PetscBool cong, lch, gch; 769 PetscErrorCode ierr; 770 771 PetscFunctionBegin; 772 /* get locally owned rows */ 773 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 774 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 775 /* fix right hand side if needed */ 776 if (x && b) { 777 const PetscScalar *xx; 778 PetscScalar *bb; 779 780 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 781 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 782 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 783 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 784 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 785 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 786 } 787 788 sA = mat->A->nonzerostate; 789 sB = mat->B->nonzerostate; 790 791 if (diag != 0.0 && cong) { 792 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 793 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 794 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 795 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 796 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 797 PetscInt nnwA, nnwB; 798 PetscBool nnzA, nnzB; 799 800 nnwA = aijA->nonew; 801 nnwB = aijB->nonew; 802 nnzA = aijA->keepnonzeropattern; 803 nnzB = aijB->keepnonzeropattern; 804 if (!nnzA) { 805 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 806 aijA->nonew = 0; 807 } 808 if (!nnzB) { 809 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 810 aijB->nonew = 0; 811 } 812 /* Must zero here before the next loop */ 813 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 814 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 815 for (r = 0; r < len; ++r) { 816 const PetscInt row = lrows[r] + A->rmap->rstart; 817 if (row >= A->cmap->N) continue; 818 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 819 } 820 aijA->nonew = nnwA; 821 aijB->nonew = nnwB; 822 } else { 823 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 824 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 825 } 826 ierr = PetscFree(lrows);CHKERRQ(ierr); 827 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 828 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 829 830 /* reduce nonzerostate */ 831 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 832 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 833 if (gch) A->nonzerostate++; 834 PetscFunctionReturn(0); 835 } 836 837 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 838 { 839 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 840 PetscErrorCode ierr; 841 PetscMPIInt n = A->rmap->n; 842 PetscInt i,j,r,m,len = 0; 843 PetscInt *lrows,*owners = A->rmap->range; 844 PetscMPIInt p = 0; 845 PetscSFNode *rrows; 846 PetscSF sf; 847 const PetscScalar *xx; 848 PetscScalar *bb,*mask; 849 Vec xmask,lmask; 850 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 851 const PetscInt *aj, *ii,*ridx; 852 PetscScalar *aa; 853 854 PetscFunctionBegin; 855 /* Create SF where leaves are input rows and roots are owned rows */ 856 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 857 for (r = 0; r < n; ++r) lrows[r] = -1; 858 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 859 for (r = 0; r < N; ++r) { 860 const PetscInt idx = rows[r]; 861 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 862 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 863 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 864 } 865 rrows[r].rank = p; 866 rrows[r].index = rows[r] - owners[p]; 867 } 868 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 869 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 870 /* Collect flags for rows to be zeroed */ 871 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 872 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 873 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 874 /* Compress and put in row numbers */ 875 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 876 /* zero diagonal part of matrix */ 877 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 878 /* handle off diagonal part of matrix */ 879 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 880 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 881 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 882 for (i=0; i<len; i++) bb[lrows[i]] = 1; 883 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 884 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 887 if (x && b) { /* this code is buggy when the row and column layout don't match */ 888 PetscBool cong; 889 890 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 891 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 892 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 893 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 894 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 895 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 896 } 897 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 898 /* remove zeroed rows of off diagonal matrix */ 899 ii = aij->i; 900 for (i=0; i<len; i++) { 901 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 902 } 903 /* loop over all elements of off process part of matrix zeroing removed columns*/ 904 if (aij->compressedrow.use) { 905 m = aij->compressedrow.nrows; 906 ii = aij->compressedrow.i; 907 ridx = aij->compressedrow.rindex; 908 for (i=0; i<m; i++) { 909 n = ii[i+1] - ii[i]; 910 aj = aij->j + ii[i]; 911 aa = aij->a + ii[i]; 912 913 for (j=0; j<n; j++) { 914 if (PetscAbsScalar(mask[*aj])) { 915 if (b) bb[*ridx] -= *aa*xx[*aj]; 916 *aa = 0.0; 917 } 918 aa++; 919 aj++; 920 } 921 ridx++; 922 } 923 } else { /* do not use compressed row format */ 924 m = l->B->rmap->n; 925 for (i=0; i<m; i++) { 926 n = ii[i+1] - ii[i]; 927 aj = aij->j + ii[i]; 928 aa = aij->a + ii[i]; 929 for (j=0; j<n; j++) { 930 if (PetscAbsScalar(mask[*aj])) { 931 if (b) bb[i] -= *aa*xx[*aj]; 932 *aa = 0.0; 933 } 934 aa++; 935 aj++; 936 } 937 } 938 } 939 if (x && b) { 940 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 941 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 942 } 943 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 944 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 945 ierr = PetscFree(lrows);CHKERRQ(ierr); 946 947 /* only change matrix nonzero state if pattern was allowed to be changed */ 948 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 949 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 950 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 951 } 952 PetscFunctionReturn(0); 953 } 954 955 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 956 { 957 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 958 PetscErrorCode ierr; 959 PetscInt nt; 960 VecScatter Mvctx = a->Mvctx; 961 962 PetscFunctionBegin; 963 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 964 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 965 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 966 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 967 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 968 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 973 { 974 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 975 PetscErrorCode ierr; 976 977 PetscFunctionBegin; 978 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 979 PetscFunctionReturn(0); 980 } 981 982 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 983 { 984 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 985 PetscErrorCode ierr; 986 VecScatter Mvctx = a->Mvctx; 987 988 PetscFunctionBegin; 989 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 990 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 991 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 997 { 998 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 999 PetscErrorCode ierr; 1000 1001 PetscFunctionBegin; 1002 /* do nondiagonal part */ 1003 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1004 /* do local part */ 1005 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1006 /* add partial results together */ 1007 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1008 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1013 { 1014 MPI_Comm comm; 1015 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1016 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1017 IS Me,Notme; 1018 PetscErrorCode ierr; 1019 PetscInt M,N,first,last,*notme,i; 1020 PetscBool lf; 1021 PetscMPIInt size; 1022 1023 PetscFunctionBegin; 1024 /* Easy test: symmetric diagonal block */ 1025 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1026 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1027 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr); 1028 if (!*f) PetscFunctionReturn(0); 1029 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1030 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 1031 if (size == 1) PetscFunctionReturn(0); 1032 1033 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1034 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1035 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1036 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1037 for (i=0; i<first; i++) notme[i] = i; 1038 for (i=last; i<M; i++) notme[i-last+first] = i; 1039 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1040 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1041 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1042 Aoff = Aoffs[0]; 1043 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1044 Boff = Boffs[0]; 1045 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1046 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1047 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1048 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1049 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1050 ierr = PetscFree(notme);CHKERRQ(ierr); 1051 PetscFunctionReturn(0); 1052 } 1053 1054 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1055 { 1056 PetscErrorCode ierr; 1057 1058 PetscFunctionBegin; 1059 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1060 PetscFunctionReturn(0); 1061 } 1062 1063 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1064 { 1065 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1066 PetscErrorCode ierr; 1067 1068 PetscFunctionBegin; 1069 /* do nondiagonal part */ 1070 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1071 /* do local part */ 1072 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1073 /* add partial results together */ 1074 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1075 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1076 PetscFunctionReturn(0); 1077 } 1078 1079 /* 1080 This only works correctly for square matrices where the subblock A->A is the 1081 diagonal block 1082 */ 1083 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1084 { 1085 PetscErrorCode ierr; 1086 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1087 1088 PetscFunctionBegin; 1089 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1090 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1091 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1096 { 1097 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1098 PetscErrorCode ierr; 1099 1100 PetscFunctionBegin; 1101 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1102 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1107 { 1108 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1109 PetscErrorCode ierr; 1110 1111 PetscFunctionBegin; 1112 #if defined(PETSC_USE_LOG) 1113 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1114 #endif 1115 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1116 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1117 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1118 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1119 #if defined(PETSC_USE_CTABLE) 1120 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1121 #else 1122 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1123 #endif 1124 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1125 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1126 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1127 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1128 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1129 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1130 1131 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1132 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1133 1134 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1135 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1136 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1137 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1138 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1139 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1140 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1141 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1142 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1143 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1144 #if defined(PETSC_HAVE_CUDA) 1145 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1146 #endif 1147 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1148 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr); 1149 #endif 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 1151 #if defined(PETSC_HAVE_ELEMENTAL) 1152 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1153 #endif 1154 #if defined(PETSC_HAVE_SCALAPACK) 1155 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1156 #endif 1157 #if defined(PETSC_HAVE_HYPRE) 1158 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1159 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1160 #endif 1161 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1162 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1163 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1164 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr); 1165 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr); 1167 #if defined(PETSC_HAVE_MKL_SPARSE) 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr); 1169 #endif 1170 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr); 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr); 1173 PetscFunctionReturn(0); 1174 } 1175 1176 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1177 { 1178 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1179 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1180 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1181 const PetscInt *garray = aij->garray; 1182 const PetscScalar *aa,*ba; 1183 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1184 PetscInt *rowlens; 1185 PetscInt *colidxs; 1186 PetscScalar *matvals; 1187 PetscErrorCode ierr; 1188 1189 PetscFunctionBegin; 1190 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1191 1192 M = mat->rmap->N; 1193 N = mat->cmap->N; 1194 m = mat->rmap->n; 1195 rs = mat->rmap->rstart; 1196 cs = mat->cmap->rstart; 1197 nz = A->nz + B->nz; 1198 1199 /* write matrix header */ 1200 header[0] = MAT_FILE_CLASSID; 1201 header[1] = M; header[2] = N; header[3] = nz; 1202 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1203 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1204 1205 /* fill in and store row lengths */ 1206 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1207 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1208 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1209 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1210 1211 /* fill in and store column indices */ 1212 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1213 for (cnt=0, i=0; i<m; i++) { 1214 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1215 if (garray[B->j[jb]] > cs) break; 1216 colidxs[cnt++] = garray[B->j[jb]]; 1217 } 1218 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1219 colidxs[cnt++] = A->j[ja] + cs; 1220 for (; jb<B->i[i+1]; jb++) 1221 colidxs[cnt++] = garray[B->j[jb]]; 1222 } 1223 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1224 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1225 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1226 1227 /* fill in and store nonzero values */ 1228 ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr); 1229 ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr); 1230 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1231 for (cnt=0, i=0; i<m; i++) { 1232 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1233 if (garray[B->j[jb]] > cs) break; 1234 matvals[cnt++] = ba[jb]; 1235 } 1236 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1237 matvals[cnt++] = aa[ja]; 1238 for (; jb<B->i[i+1]; jb++) 1239 matvals[cnt++] = ba[jb]; 1240 } 1241 ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr); 1242 ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr); 1243 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1244 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1245 ierr = PetscFree(matvals);CHKERRQ(ierr); 1246 1247 /* write block size option to the viewer's .info file */ 1248 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #include <petscdraw.h> 1253 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1254 { 1255 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1256 PetscErrorCode ierr; 1257 PetscMPIInt rank = aij->rank,size = aij->size; 1258 PetscBool isdraw,iascii,isbinary; 1259 PetscViewer sviewer; 1260 PetscViewerFormat format; 1261 1262 PetscFunctionBegin; 1263 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1264 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1265 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1266 if (iascii) { 1267 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1268 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1269 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1270 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1271 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1272 for (i=0; i<(PetscInt)size; i++) { 1273 nmax = PetscMax(nmax,nz[i]); 1274 nmin = PetscMin(nmin,nz[i]); 1275 navg += nz[i]; 1276 } 1277 ierr = PetscFree(nz);CHKERRQ(ierr); 1278 navg = navg/size; 1279 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1280 PetscFunctionReturn(0); 1281 } 1282 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1283 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1284 MatInfo info; 1285 PetscInt *inodes=NULL; 1286 1287 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 1288 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1289 ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr); 1290 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1291 if (!inodes) { 1292 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1293 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1294 } else { 1295 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1296 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1297 } 1298 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1299 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1300 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1301 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1302 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1303 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1304 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1305 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1308 PetscInt inodecount,inodelimit,*inodes; 1309 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1310 if (inodes) { 1311 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1312 } else { 1313 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1314 } 1315 PetscFunctionReturn(0); 1316 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1317 PetscFunctionReturn(0); 1318 } 1319 } else if (isbinary) { 1320 if (size == 1) { 1321 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1322 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1323 } else { 1324 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1325 } 1326 PetscFunctionReturn(0); 1327 } else if (iascii && size == 1) { 1328 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1329 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1330 PetscFunctionReturn(0); 1331 } else if (isdraw) { 1332 PetscDraw draw; 1333 PetscBool isnull; 1334 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1335 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1336 if (isnull) PetscFunctionReturn(0); 1337 } 1338 1339 { /* assemble the entire matrix onto first processor */ 1340 Mat A = NULL, Av; 1341 IS isrow,iscol; 1342 1343 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1344 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1345 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1346 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1347 /* The commented code uses MatCreateSubMatrices instead */ 1348 /* 1349 Mat *AA, A = NULL, Av; 1350 IS isrow,iscol; 1351 1352 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1353 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1354 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1355 if (!rank) { 1356 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1357 A = AA[0]; 1358 Av = AA[0]; 1359 } 1360 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1361 */ 1362 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1363 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1364 /* 1365 Everyone has to call to draw the matrix since the graphics waits are 1366 synchronized across all processors that share the PetscDraw object 1367 */ 1368 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1369 if (!rank) { 1370 if (((PetscObject)mat)->name) { 1371 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1372 } 1373 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1374 } 1375 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1376 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1377 ierr = MatDestroy(&A);CHKERRQ(ierr); 1378 } 1379 PetscFunctionReturn(0); 1380 } 1381 1382 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1383 { 1384 PetscErrorCode ierr; 1385 PetscBool iascii,isdraw,issocket,isbinary; 1386 1387 PetscFunctionBegin; 1388 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1389 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1390 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1391 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1392 if (iascii || isdraw || isbinary || issocket) { 1393 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1394 } 1395 PetscFunctionReturn(0); 1396 } 1397 1398 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1399 { 1400 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1401 PetscErrorCode ierr; 1402 Vec bb1 = NULL; 1403 PetscBool hasop; 1404 1405 PetscFunctionBegin; 1406 if (flag == SOR_APPLY_UPPER) { 1407 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1412 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1413 } 1414 1415 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1416 if (flag & SOR_ZERO_INITIAL_GUESS) { 1417 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1418 its--; 1419 } 1420 1421 while (its--) { 1422 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1423 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1424 1425 /* update rhs: bb1 = bb - B*x */ 1426 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1427 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1428 1429 /* local sweep */ 1430 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1431 } 1432 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1433 if (flag & SOR_ZERO_INITIAL_GUESS) { 1434 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1435 its--; 1436 } 1437 while (its--) { 1438 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1439 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1440 1441 /* update rhs: bb1 = bb - B*x */ 1442 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1443 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1444 1445 /* local sweep */ 1446 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1447 } 1448 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1449 if (flag & SOR_ZERO_INITIAL_GUESS) { 1450 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1451 its--; 1452 } 1453 while (its--) { 1454 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1455 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1456 1457 /* update rhs: bb1 = bb - B*x */ 1458 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1459 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1460 1461 /* local sweep */ 1462 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1463 } 1464 } else if (flag & SOR_EISENSTAT) { 1465 Vec xx1; 1466 1467 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1468 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1469 1470 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1471 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1472 if (!mat->diag) { 1473 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1474 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1475 } 1476 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1477 if (hasop) { 1478 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1479 } else { 1480 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1481 } 1482 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1483 1484 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1485 1486 /* local sweep */ 1487 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1488 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1489 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1490 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1491 1492 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1493 1494 matin->factorerrortype = mat->A->factorerrortype; 1495 PetscFunctionReturn(0); 1496 } 1497 1498 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1499 { 1500 Mat aA,aB,Aperm; 1501 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1502 PetscScalar *aa,*ba; 1503 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1504 PetscSF rowsf,sf; 1505 IS parcolp = NULL; 1506 PetscBool done; 1507 PetscErrorCode ierr; 1508 1509 PetscFunctionBegin; 1510 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1511 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1512 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1513 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1514 1515 /* Invert row permutation to find out where my rows should go */ 1516 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1517 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1518 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1519 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1520 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1521 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1522 1523 /* Invert column permutation to find out where my columns should go */ 1524 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1525 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1526 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1527 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1528 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1529 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1530 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1531 1532 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1533 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1534 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1535 1536 /* Find out where my gcols should go */ 1537 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1538 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1539 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1540 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1541 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1542 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1543 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1544 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1545 1546 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1547 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1548 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1549 for (i=0; i<m; i++) { 1550 PetscInt row = rdest[i]; 1551 PetscMPIInt rowner; 1552 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1553 for (j=ai[i]; j<ai[i+1]; j++) { 1554 PetscInt col = cdest[aj[j]]; 1555 PetscMPIInt cowner; 1556 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1557 if (rowner == cowner) dnnz[i]++; 1558 else onnz[i]++; 1559 } 1560 for (j=bi[i]; j<bi[i+1]; j++) { 1561 PetscInt col = gcdest[bj[j]]; 1562 PetscMPIInt cowner; 1563 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1564 if (rowner == cowner) dnnz[i]++; 1565 else onnz[i]++; 1566 } 1567 } 1568 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1569 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1570 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1571 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1572 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1573 1574 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1575 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1576 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1577 for (i=0; i<m; i++) { 1578 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1579 PetscInt j0,rowlen; 1580 rowlen = ai[i+1] - ai[i]; 1581 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1582 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1583 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1584 } 1585 rowlen = bi[i+1] - bi[i]; 1586 for (j0=j=0; j<rowlen; j0=j) { 1587 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1588 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1589 } 1590 } 1591 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1592 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1593 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1594 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1595 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1596 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1597 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1598 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1599 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1600 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1601 *B = Aperm; 1602 PetscFunctionReturn(0); 1603 } 1604 1605 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1606 { 1607 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1608 PetscErrorCode ierr; 1609 1610 PetscFunctionBegin; 1611 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1612 if (ghosts) *ghosts = aij->garray; 1613 PetscFunctionReturn(0); 1614 } 1615 1616 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1617 { 1618 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1619 Mat A = mat->A,B = mat->B; 1620 PetscErrorCode ierr; 1621 PetscLogDouble isend[5],irecv[5]; 1622 1623 PetscFunctionBegin; 1624 info->block_size = 1.0; 1625 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1626 1627 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1628 isend[3] = info->memory; isend[4] = info->mallocs; 1629 1630 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1631 1632 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1633 isend[3] += info->memory; isend[4] += info->mallocs; 1634 if (flag == MAT_LOCAL) { 1635 info->nz_used = isend[0]; 1636 info->nz_allocated = isend[1]; 1637 info->nz_unneeded = isend[2]; 1638 info->memory = isend[3]; 1639 info->mallocs = isend[4]; 1640 } else if (flag == MAT_GLOBAL_MAX) { 1641 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1642 1643 info->nz_used = irecv[0]; 1644 info->nz_allocated = irecv[1]; 1645 info->nz_unneeded = irecv[2]; 1646 info->memory = irecv[3]; 1647 info->mallocs = irecv[4]; 1648 } else if (flag == MAT_GLOBAL_SUM) { 1649 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1650 1651 info->nz_used = irecv[0]; 1652 info->nz_allocated = irecv[1]; 1653 info->nz_unneeded = irecv[2]; 1654 info->memory = irecv[3]; 1655 info->mallocs = irecv[4]; 1656 } 1657 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1658 info->fill_ratio_needed = 0; 1659 info->factor_mallocs = 0; 1660 PetscFunctionReturn(0); 1661 } 1662 1663 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1664 { 1665 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1666 PetscErrorCode ierr; 1667 1668 PetscFunctionBegin; 1669 switch (op) { 1670 case MAT_NEW_NONZERO_LOCATIONS: 1671 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1672 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1673 case MAT_KEEP_NONZERO_PATTERN: 1674 case MAT_NEW_NONZERO_LOCATION_ERR: 1675 case MAT_USE_INODES: 1676 case MAT_IGNORE_ZERO_ENTRIES: 1677 case MAT_FORM_EXPLICIT_TRANSPOSE: 1678 MatCheckPreallocated(A,1); 1679 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1680 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1681 break; 1682 case MAT_ROW_ORIENTED: 1683 MatCheckPreallocated(A,1); 1684 a->roworiented = flg; 1685 1686 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1687 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1688 break; 1689 case MAT_FORCE_DIAGONAL_ENTRIES: 1690 case MAT_SORTED_FULL: 1691 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1692 break; 1693 case MAT_IGNORE_OFF_PROC_ENTRIES: 1694 a->donotstash = flg; 1695 break; 1696 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1697 case MAT_SPD: 1698 case MAT_SYMMETRIC: 1699 case MAT_STRUCTURALLY_SYMMETRIC: 1700 case MAT_HERMITIAN: 1701 case MAT_SYMMETRY_ETERNAL: 1702 break; 1703 case MAT_SUBMAT_SINGLEIS: 1704 A->submat_singleis = flg; 1705 break; 1706 case MAT_STRUCTURE_ONLY: 1707 /* The option is handled directly by MatSetOption() */ 1708 break; 1709 default: 1710 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1711 } 1712 PetscFunctionReturn(0); 1713 } 1714 1715 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1716 { 1717 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1718 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1719 PetscErrorCode ierr; 1720 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1721 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1722 PetscInt *cmap,*idx_p; 1723 1724 PetscFunctionBegin; 1725 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1726 mat->getrowactive = PETSC_TRUE; 1727 1728 if (!mat->rowvalues && (idx || v)) { 1729 /* 1730 allocate enough space to hold information from the longest row. 1731 */ 1732 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1733 PetscInt max = 1,tmp; 1734 for (i=0; i<matin->rmap->n; i++) { 1735 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1736 if (max < tmp) max = tmp; 1737 } 1738 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1739 } 1740 1741 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1742 lrow = row - rstart; 1743 1744 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1745 if (!v) {pvA = NULL; pvB = NULL;} 1746 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1747 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1748 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1749 nztot = nzA + nzB; 1750 1751 cmap = mat->garray; 1752 if (v || idx) { 1753 if (nztot) { 1754 /* Sort by increasing column numbers, assuming A and B already sorted */ 1755 PetscInt imark = -1; 1756 if (v) { 1757 *v = v_p = mat->rowvalues; 1758 for (i=0; i<nzB; i++) { 1759 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1760 else break; 1761 } 1762 imark = i; 1763 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1764 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1765 } 1766 if (idx) { 1767 *idx = idx_p = mat->rowindices; 1768 if (imark > -1) { 1769 for (i=0; i<imark; i++) { 1770 idx_p[i] = cmap[cworkB[i]]; 1771 } 1772 } else { 1773 for (i=0; i<nzB; i++) { 1774 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1775 else break; 1776 } 1777 imark = i; 1778 } 1779 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1780 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1781 } 1782 } else { 1783 if (idx) *idx = NULL; 1784 if (v) *v = NULL; 1785 } 1786 } 1787 *nz = nztot; 1788 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1789 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1790 PetscFunctionReturn(0); 1791 } 1792 1793 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1794 { 1795 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1796 1797 PetscFunctionBegin; 1798 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1799 aij->getrowactive = PETSC_FALSE; 1800 PetscFunctionReturn(0); 1801 } 1802 1803 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1804 { 1805 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1806 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1807 PetscErrorCode ierr; 1808 PetscInt i,j,cstart = mat->cmap->rstart; 1809 PetscReal sum = 0.0; 1810 MatScalar *v; 1811 1812 PetscFunctionBegin; 1813 if (aij->size == 1) { 1814 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1815 } else { 1816 if (type == NORM_FROBENIUS) { 1817 v = amat->a; 1818 for (i=0; i<amat->nz; i++) { 1819 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1820 } 1821 v = bmat->a; 1822 for (i=0; i<bmat->nz; i++) { 1823 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1824 } 1825 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1826 *norm = PetscSqrtReal(*norm); 1827 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1828 } else if (type == NORM_1) { /* max column norm */ 1829 PetscReal *tmp,*tmp2; 1830 PetscInt *jj,*garray = aij->garray; 1831 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1832 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1833 *norm = 0.0; 1834 v = amat->a; jj = amat->j; 1835 for (j=0; j<amat->nz; j++) { 1836 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1837 } 1838 v = bmat->a; jj = bmat->j; 1839 for (j=0; j<bmat->nz; j++) { 1840 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1841 } 1842 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1843 for (j=0; j<mat->cmap->N; j++) { 1844 if (tmp2[j] > *norm) *norm = tmp2[j]; 1845 } 1846 ierr = PetscFree(tmp);CHKERRQ(ierr); 1847 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1848 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1849 } else if (type == NORM_INFINITY) { /* max row norm */ 1850 PetscReal ntemp = 0.0; 1851 for (j=0; j<aij->A->rmap->n; j++) { 1852 v = amat->a + amat->i[j]; 1853 sum = 0.0; 1854 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1855 sum += PetscAbsScalar(*v); v++; 1856 } 1857 v = bmat->a + bmat->i[j]; 1858 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1859 sum += PetscAbsScalar(*v); v++; 1860 } 1861 if (sum > ntemp) ntemp = sum; 1862 } 1863 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1864 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1865 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1866 } 1867 PetscFunctionReturn(0); 1868 } 1869 1870 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1871 { 1872 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1873 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1874 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1875 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1876 PetscErrorCode ierr; 1877 Mat B,A_diag,*B_diag; 1878 const MatScalar *pbv,*bv; 1879 1880 PetscFunctionBegin; 1881 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1882 ai = Aloc->i; aj = Aloc->j; 1883 bi = Bloc->i; bj = Bloc->j; 1884 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1885 PetscInt *d_nnz,*g_nnz,*o_nnz; 1886 PetscSFNode *oloc; 1887 PETSC_UNUSED PetscSF sf; 1888 1889 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1890 /* compute d_nnz for preallocation */ 1891 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 1892 for (i=0; i<ai[ma]; i++) { 1893 d_nnz[aj[i]]++; 1894 } 1895 /* compute local off-diagonal contributions */ 1896 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 1897 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1898 /* map those to global */ 1899 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1900 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1901 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1902 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 1903 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1904 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1905 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1906 1907 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1908 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1909 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1910 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1911 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1912 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1913 } else { 1914 B = *matout; 1915 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1916 } 1917 1918 b = (Mat_MPIAIJ*)B->data; 1919 A_diag = a->A; 1920 B_diag = &b->A; 1921 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1922 A_diag_ncol = A_diag->cmap->N; 1923 B_diag_ilen = sub_B_diag->ilen; 1924 B_diag_i = sub_B_diag->i; 1925 1926 /* Set ilen for diagonal of B */ 1927 for (i=0; i<A_diag_ncol; i++) { 1928 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1929 } 1930 1931 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1932 very quickly (=without using MatSetValues), because all writes are local. */ 1933 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 1934 1935 /* copy over the B part */ 1936 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 1937 ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr); 1938 pbv = bv; 1939 row = A->rmap->rstart; 1940 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1941 cols_tmp = cols; 1942 for (i=0; i<mb; i++) { 1943 ncol = bi[i+1]-bi[i]; 1944 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr); 1945 row++; 1946 pbv += ncol; cols_tmp += ncol; 1947 } 1948 ierr = PetscFree(cols);CHKERRQ(ierr); 1949 ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr); 1950 1951 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1952 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1953 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1954 *matout = B; 1955 } else { 1956 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1957 } 1958 PetscFunctionReturn(0); 1959 } 1960 1961 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1962 { 1963 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1964 Mat a = aij->A,b = aij->B; 1965 PetscErrorCode ierr; 1966 PetscInt s1,s2,s3; 1967 1968 PetscFunctionBegin; 1969 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1970 if (rr) { 1971 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1972 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1973 /* Overlap communication with computation. */ 1974 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1975 } 1976 if (ll) { 1977 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1978 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1979 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1980 } 1981 /* scale the diagonal block */ 1982 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1983 1984 if (rr) { 1985 /* Do a scatter end and then right scale the off-diagonal block */ 1986 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1987 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 1988 } 1989 PetscFunctionReturn(0); 1990 } 1991 1992 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1993 { 1994 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1995 PetscErrorCode ierr; 1996 1997 PetscFunctionBegin; 1998 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1999 PetscFunctionReturn(0); 2000 } 2001 2002 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2003 { 2004 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2005 Mat a,b,c,d; 2006 PetscBool flg; 2007 PetscErrorCode ierr; 2008 2009 PetscFunctionBegin; 2010 a = matA->A; b = matA->B; 2011 c = matB->A; d = matB->B; 2012 2013 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2014 if (flg) { 2015 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2016 } 2017 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2022 { 2023 PetscErrorCode ierr; 2024 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2025 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2026 2027 PetscFunctionBegin; 2028 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2029 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2030 /* because of the column compression in the off-processor part of the matrix a->B, 2031 the number of columns in a->B and b->B may be different, hence we cannot call 2032 the MatCopy() directly on the two parts. If need be, we can provide a more 2033 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2034 then copying the submatrices */ 2035 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2036 } else { 2037 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2038 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2039 } 2040 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2041 PetscFunctionReturn(0); 2042 } 2043 2044 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2045 { 2046 PetscErrorCode ierr; 2047 2048 PetscFunctionBegin; 2049 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2050 PetscFunctionReturn(0); 2051 } 2052 2053 /* 2054 Computes the number of nonzeros per row needed for preallocation when X and Y 2055 have different nonzero structure. 2056 */ 2057 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2058 { 2059 PetscInt i,j,k,nzx,nzy; 2060 2061 PetscFunctionBegin; 2062 /* Set the number of nonzeros in the new matrix */ 2063 for (i=0; i<m; i++) { 2064 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2065 nzx = xi[i+1] - xi[i]; 2066 nzy = yi[i+1] - yi[i]; 2067 nnz[i] = 0; 2068 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2069 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2070 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2071 nnz[i]++; 2072 } 2073 for (; k<nzy; k++) nnz[i]++; 2074 } 2075 PetscFunctionReturn(0); 2076 } 2077 2078 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2079 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2080 { 2081 PetscErrorCode ierr; 2082 PetscInt m = Y->rmap->N; 2083 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2084 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2085 2086 PetscFunctionBegin; 2087 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2088 PetscFunctionReturn(0); 2089 } 2090 2091 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2092 { 2093 PetscErrorCode ierr; 2094 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2095 2096 PetscFunctionBegin; 2097 if (str == SAME_NONZERO_PATTERN) { 2098 ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr); 2099 ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr); 2100 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2101 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2102 } else { 2103 Mat B; 2104 PetscInt *nnz_d,*nnz_o; 2105 2106 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2107 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2108 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2109 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2110 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2111 ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr); 2112 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2113 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2114 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2115 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2116 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2117 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2118 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2119 } 2120 PetscFunctionReturn(0); 2121 } 2122 2123 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2124 2125 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2126 { 2127 #if defined(PETSC_USE_COMPLEX) 2128 PetscErrorCode ierr; 2129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2130 2131 PetscFunctionBegin; 2132 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2133 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2134 #else 2135 PetscFunctionBegin; 2136 #endif 2137 PetscFunctionReturn(0); 2138 } 2139 2140 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2141 { 2142 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2143 PetscErrorCode ierr; 2144 2145 PetscFunctionBegin; 2146 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2147 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2152 { 2153 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2154 PetscErrorCode ierr; 2155 2156 PetscFunctionBegin; 2157 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2158 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2159 PetscFunctionReturn(0); 2160 } 2161 2162 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2163 { 2164 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2165 PetscErrorCode ierr; 2166 PetscInt i,*idxb = NULL,m = A->rmap->n; 2167 PetscScalar *va,*vv; 2168 Vec vB,vA; 2169 const PetscScalar *vb; 2170 2171 PetscFunctionBegin; 2172 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 2173 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2174 2175 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2176 if (idx) { 2177 for (i=0; i<m; i++) { 2178 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2179 } 2180 } 2181 2182 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 2183 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 2184 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2185 2186 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2187 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2188 for (i=0; i<m; i++) { 2189 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2190 vv[i] = vb[i]; 2191 if (idx) idx[i] = a->garray[idxb[i]]; 2192 } else { 2193 vv[i] = va[i]; 2194 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2195 idx[i] = a->garray[idxb[i]]; 2196 } 2197 } 2198 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2199 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2200 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2201 ierr = PetscFree(idxb);CHKERRQ(ierr); 2202 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2203 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2204 PetscFunctionReturn(0); 2205 } 2206 2207 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2208 { 2209 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2210 PetscInt m = A->rmap->n,n = A->cmap->n; 2211 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2212 PetscInt *cmap = mat->garray; 2213 PetscInt *diagIdx, *offdiagIdx; 2214 Vec diagV, offdiagV; 2215 PetscScalar *a, *diagA, *offdiagA; 2216 const PetscScalar *ba,*bav; 2217 PetscInt r,j,col,ncols,*bi,*bj; 2218 PetscErrorCode ierr; 2219 Mat B = mat->B; 2220 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2221 2222 PetscFunctionBegin; 2223 /* When a process holds entire A and other processes have no entry */ 2224 if (A->cmap->N == n) { 2225 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2226 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2227 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr); 2228 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2229 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } else if (n == 0) { 2232 if (m) { 2233 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2234 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2235 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2236 } 2237 PetscFunctionReturn(0); 2238 } 2239 2240 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2241 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2242 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2243 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2244 2245 /* Get offdiagIdx[] for implicit 0.0 */ 2246 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2247 ba = bav; 2248 bi = b->i; 2249 bj = b->j; 2250 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2251 for (r = 0; r < m; r++) { 2252 ncols = bi[r+1] - bi[r]; 2253 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2254 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2255 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2256 offdiagA[r] = 0.0; 2257 2258 /* Find first hole in the cmap */ 2259 for (j=0; j<ncols; j++) { 2260 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2261 if (col > j && j < cstart) { 2262 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2263 break; 2264 } else if (col > j + n && j >= cstart) { 2265 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2266 break; 2267 } 2268 } 2269 if (j == ncols && ncols < A->cmap->N - n) { 2270 /* a hole is outside compressed Bcols */ 2271 if (ncols == 0) { 2272 if (cstart) { 2273 offdiagIdx[r] = 0; 2274 } else offdiagIdx[r] = cend; 2275 } else { /* ncols > 0 */ 2276 offdiagIdx[r] = cmap[ncols-1] + 1; 2277 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2278 } 2279 } 2280 } 2281 2282 for (j=0; j<ncols; j++) { 2283 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2284 ba++; bj++; 2285 } 2286 } 2287 2288 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2289 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2290 for (r = 0; r < m; ++r) { 2291 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2292 a[r] = diagA[r]; 2293 if (idx) idx[r] = cstart + diagIdx[r]; 2294 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2295 a[r] = diagA[r]; 2296 if (idx) { 2297 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2298 idx[r] = cstart + diagIdx[r]; 2299 } else idx[r] = offdiagIdx[r]; 2300 } 2301 } else { 2302 a[r] = offdiagA[r]; 2303 if (idx) idx[r] = offdiagIdx[r]; 2304 } 2305 } 2306 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2307 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2308 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2309 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2310 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2311 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2312 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2313 PetscFunctionReturn(0); 2314 } 2315 2316 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2317 { 2318 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2319 PetscInt m = A->rmap->n,n = A->cmap->n; 2320 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2321 PetscInt *cmap = mat->garray; 2322 PetscInt *diagIdx, *offdiagIdx; 2323 Vec diagV, offdiagV; 2324 PetscScalar *a, *diagA, *offdiagA; 2325 const PetscScalar *ba,*bav; 2326 PetscInt r,j,col,ncols,*bi,*bj; 2327 PetscErrorCode ierr; 2328 Mat B = mat->B; 2329 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2330 2331 PetscFunctionBegin; 2332 /* When a process holds entire A and other processes have no entry */ 2333 if (A->cmap->N == n) { 2334 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2335 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2336 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2337 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2338 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2339 PetscFunctionReturn(0); 2340 } else if (n == 0) { 2341 if (m) { 2342 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2343 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2344 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2345 } 2346 PetscFunctionReturn(0); 2347 } 2348 2349 ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2350 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2351 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2352 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2353 2354 /* Get offdiagIdx[] for implicit 0.0 */ 2355 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2356 ba = bav; 2357 bi = b->i; 2358 bj = b->j; 2359 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2360 for (r = 0; r < m; r++) { 2361 ncols = bi[r+1] - bi[r]; 2362 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2363 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2364 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2365 offdiagA[r] = 0.0; 2366 2367 /* Find first hole in the cmap */ 2368 for (j=0; j<ncols; j++) { 2369 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2370 if (col > j && j < cstart) { 2371 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2372 break; 2373 } else if (col > j + n && j >= cstart) { 2374 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2375 break; 2376 } 2377 } 2378 if (j == ncols && ncols < A->cmap->N - n) { 2379 /* a hole is outside compressed Bcols */ 2380 if (ncols == 0) { 2381 if (cstart) { 2382 offdiagIdx[r] = 0; 2383 } else offdiagIdx[r] = cend; 2384 } else { /* ncols > 0 */ 2385 offdiagIdx[r] = cmap[ncols-1] + 1; 2386 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2387 } 2388 } 2389 } 2390 2391 for (j=0; j<ncols; j++) { 2392 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2393 ba++; bj++; 2394 } 2395 } 2396 2397 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2398 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2399 for (r = 0; r < m; ++r) { 2400 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2401 a[r] = diagA[r]; 2402 if (idx) idx[r] = cstart + diagIdx[r]; 2403 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2404 a[r] = diagA[r]; 2405 if (idx) { 2406 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2407 idx[r] = cstart + diagIdx[r]; 2408 } else idx[r] = offdiagIdx[r]; 2409 } 2410 } else { 2411 a[r] = offdiagA[r]; 2412 if (idx) idx[r] = offdiagIdx[r]; 2413 } 2414 } 2415 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2416 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2417 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2418 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2419 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2420 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2421 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2422 PetscFunctionReturn(0); 2423 } 2424 2425 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2426 { 2427 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2428 PetscInt m = A->rmap->n,n = A->cmap->n; 2429 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2430 PetscInt *cmap = mat->garray; 2431 PetscInt *diagIdx, *offdiagIdx; 2432 Vec diagV, offdiagV; 2433 PetscScalar *a, *diagA, *offdiagA; 2434 const PetscScalar *ba,*bav; 2435 PetscInt r,j,col,ncols,*bi,*bj; 2436 PetscErrorCode ierr; 2437 Mat B = mat->B; 2438 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2439 2440 PetscFunctionBegin; 2441 /* When a process holds entire A and other processes have no entry */ 2442 if (A->cmap->N == n) { 2443 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2444 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2445 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2446 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2447 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2448 PetscFunctionReturn(0); 2449 } else if (n == 0) { 2450 if (m) { 2451 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2452 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2453 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2454 } 2455 PetscFunctionReturn(0); 2456 } 2457 2458 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2459 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2460 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2461 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2462 2463 /* Get offdiagIdx[] for implicit 0.0 */ 2464 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2465 ba = bav; 2466 bi = b->i; 2467 bj = b->j; 2468 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2469 for (r = 0; r < m; r++) { 2470 ncols = bi[r+1] - bi[r]; 2471 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2472 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2473 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2474 offdiagA[r] = 0.0; 2475 2476 /* Find first hole in the cmap */ 2477 for (j=0; j<ncols; j++) { 2478 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2479 if (col > j && j < cstart) { 2480 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2481 break; 2482 } else if (col > j + n && j >= cstart) { 2483 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2484 break; 2485 } 2486 } 2487 if (j == ncols && ncols < A->cmap->N - n) { 2488 /* a hole is outside compressed Bcols */ 2489 if (ncols == 0) { 2490 if (cstart) { 2491 offdiagIdx[r] = 0; 2492 } else offdiagIdx[r] = cend; 2493 } else { /* ncols > 0 */ 2494 offdiagIdx[r] = cmap[ncols-1] + 1; 2495 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2496 } 2497 } 2498 } 2499 2500 for (j=0; j<ncols; j++) { 2501 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2502 ba++; bj++; 2503 } 2504 } 2505 2506 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2507 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2508 for (r = 0; r < m; ++r) { 2509 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2510 a[r] = diagA[r]; 2511 if (idx) idx[r] = cstart + diagIdx[r]; 2512 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2513 a[r] = diagA[r]; 2514 if (idx) { 2515 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2516 idx[r] = cstart + diagIdx[r]; 2517 } else idx[r] = offdiagIdx[r]; 2518 } 2519 } else { 2520 a[r] = offdiagA[r]; 2521 if (idx) idx[r] = offdiagIdx[r]; 2522 } 2523 } 2524 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2525 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2526 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2527 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2528 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2529 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2530 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2531 PetscFunctionReturn(0); 2532 } 2533 2534 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2535 { 2536 PetscErrorCode ierr; 2537 Mat *dummy; 2538 2539 PetscFunctionBegin; 2540 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2541 *newmat = *dummy; 2542 ierr = PetscFree(dummy);CHKERRQ(ierr); 2543 PetscFunctionReturn(0); 2544 } 2545 2546 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2547 { 2548 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2549 PetscErrorCode ierr; 2550 2551 PetscFunctionBegin; 2552 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2553 A->factorerrortype = a->A->factorerrortype; 2554 PetscFunctionReturn(0); 2555 } 2556 2557 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2558 { 2559 PetscErrorCode ierr; 2560 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2561 2562 PetscFunctionBegin; 2563 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2564 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2565 if (x->assembled) { 2566 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2567 } else { 2568 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2569 } 2570 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2571 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2572 PetscFunctionReturn(0); 2573 } 2574 2575 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2576 { 2577 PetscFunctionBegin; 2578 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2579 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2580 PetscFunctionReturn(0); 2581 } 2582 2583 /*@ 2584 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2585 2586 Collective on Mat 2587 2588 Input Parameters: 2589 + A - the matrix 2590 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2591 2592 Level: advanced 2593 2594 @*/ 2595 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2596 { 2597 PetscErrorCode ierr; 2598 2599 PetscFunctionBegin; 2600 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2601 PetscFunctionReturn(0); 2602 } 2603 2604 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2605 { 2606 PetscErrorCode ierr; 2607 PetscBool sc = PETSC_FALSE,flg; 2608 2609 PetscFunctionBegin; 2610 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2611 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2612 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2613 if (flg) { 2614 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2615 } 2616 ierr = PetscOptionsTail();CHKERRQ(ierr); 2617 PetscFunctionReturn(0); 2618 } 2619 2620 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2621 { 2622 PetscErrorCode ierr; 2623 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2624 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2625 2626 PetscFunctionBegin; 2627 if (!Y->preallocated) { 2628 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2629 } else if (!aij->nz) { 2630 PetscInt nonew = aij->nonew; 2631 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2632 aij->nonew = nonew; 2633 } 2634 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2635 PetscFunctionReturn(0); 2636 } 2637 2638 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2639 { 2640 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2641 PetscErrorCode ierr; 2642 2643 PetscFunctionBegin; 2644 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2645 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2646 if (d) { 2647 PetscInt rstart; 2648 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2649 *d += rstart; 2650 2651 } 2652 PetscFunctionReturn(0); 2653 } 2654 2655 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2656 { 2657 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2658 PetscErrorCode ierr; 2659 2660 PetscFunctionBegin; 2661 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2662 PetscFunctionReturn(0); 2663 } 2664 2665 /* -------------------------------------------------------------------*/ 2666 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2667 MatGetRow_MPIAIJ, 2668 MatRestoreRow_MPIAIJ, 2669 MatMult_MPIAIJ, 2670 /* 4*/ MatMultAdd_MPIAIJ, 2671 MatMultTranspose_MPIAIJ, 2672 MatMultTransposeAdd_MPIAIJ, 2673 NULL, 2674 NULL, 2675 NULL, 2676 /*10*/ NULL, 2677 NULL, 2678 NULL, 2679 MatSOR_MPIAIJ, 2680 MatTranspose_MPIAIJ, 2681 /*15*/ MatGetInfo_MPIAIJ, 2682 MatEqual_MPIAIJ, 2683 MatGetDiagonal_MPIAIJ, 2684 MatDiagonalScale_MPIAIJ, 2685 MatNorm_MPIAIJ, 2686 /*20*/ MatAssemblyBegin_MPIAIJ, 2687 MatAssemblyEnd_MPIAIJ, 2688 MatSetOption_MPIAIJ, 2689 MatZeroEntries_MPIAIJ, 2690 /*24*/ MatZeroRows_MPIAIJ, 2691 NULL, 2692 NULL, 2693 NULL, 2694 NULL, 2695 /*29*/ MatSetUp_MPIAIJ, 2696 NULL, 2697 NULL, 2698 MatGetDiagonalBlock_MPIAIJ, 2699 NULL, 2700 /*34*/ MatDuplicate_MPIAIJ, 2701 NULL, 2702 NULL, 2703 NULL, 2704 NULL, 2705 /*39*/ MatAXPY_MPIAIJ, 2706 MatCreateSubMatrices_MPIAIJ, 2707 MatIncreaseOverlap_MPIAIJ, 2708 MatGetValues_MPIAIJ, 2709 MatCopy_MPIAIJ, 2710 /*44*/ MatGetRowMax_MPIAIJ, 2711 MatScale_MPIAIJ, 2712 MatShift_MPIAIJ, 2713 MatDiagonalSet_MPIAIJ, 2714 MatZeroRowsColumns_MPIAIJ, 2715 /*49*/ MatSetRandom_MPIAIJ, 2716 NULL, 2717 NULL, 2718 NULL, 2719 NULL, 2720 /*54*/ MatFDColoringCreate_MPIXAIJ, 2721 NULL, 2722 MatSetUnfactored_MPIAIJ, 2723 MatPermute_MPIAIJ, 2724 NULL, 2725 /*59*/ MatCreateSubMatrix_MPIAIJ, 2726 MatDestroy_MPIAIJ, 2727 MatView_MPIAIJ, 2728 NULL, 2729 NULL, 2730 /*64*/ NULL, 2731 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2732 NULL, 2733 NULL, 2734 NULL, 2735 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2736 MatGetRowMinAbs_MPIAIJ, 2737 NULL, 2738 NULL, 2739 NULL, 2740 NULL, 2741 /*75*/ MatFDColoringApply_AIJ, 2742 MatSetFromOptions_MPIAIJ, 2743 NULL, 2744 NULL, 2745 MatFindZeroDiagonals_MPIAIJ, 2746 /*80*/ NULL, 2747 NULL, 2748 NULL, 2749 /*83*/ MatLoad_MPIAIJ, 2750 MatIsSymmetric_MPIAIJ, 2751 NULL, 2752 NULL, 2753 NULL, 2754 NULL, 2755 /*89*/ NULL, 2756 NULL, 2757 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2758 NULL, 2759 NULL, 2760 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2761 NULL, 2762 NULL, 2763 NULL, 2764 MatBindToCPU_MPIAIJ, 2765 /*99*/ MatProductSetFromOptions_MPIAIJ, 2766 NULL, 2767 NULL, 2768 MatConjugate_MPIAIJ, 2769 NULL, 2770 /*104*/MatSetValuesRow_MPIAIJ, 2771 MatRealPart_MPIAIJ, 2772 MatImaginaryPart_MPIAIJ, 2773 NULL, 2774 NULL, 2775 /*109*/NULL, 2776 NULL, 2777 MatGetRowMin_MPIAIJ, 2778 NULL, 2779 MatMissingDiagonal_MPIAIJ, 2780 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2781 NULL, 2782 MatGetGhosts_MPIAIJ, 2783 NULL, 2784 NULL, 2785 /*119*/MatMultDiagonalBlock_MPIAIJ, 2786 NULL, 2787 NULL, 2788 NULL, 2789 MatGetMultiProcBlock_MPIAIJ, 2790 /*124*/MatFindNonzeroRows_MPIAIJ, 2791 MatGetColumnNorms_MPIAIJ, 2792 MatInvertBlockDiagonal_MPIAIJ, 2793 MatInvertVariableBlockDiagonal_MPIAIJ, 2794 MatCreateSubMatricesMPI_MPIAIJ, 2795 /*129*/NULL, 2796 NULL, 2797 NULL, 2798 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2799 NULL, 2800 /*134*/NULL, 2801 NULL, 2802 NULL, 2803 NULL, 2804 NULL, 2805 /*139*/MatSetBlockSizes_MPIAIJ, 2806 NULL, 2807 NULL, 2808 MatFDColoringSetUp_MPIXAIJ, 2809 MatFindOffBlockDiagonalEntries_MPIAIJ, 2810 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2811 /*145*/NULL, 2812 NULL, 2813 NULL 2814 }; 2815 2816 /* ----------------------------------------------------------------------------------------*/ 2817 2818 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2819 { 2820 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2821 PetscErrorCode ierr; 2822 2823 PetscFunctionBegin; 2824 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2825 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2826 PetscFunctionReturn(0); 2827 } 2828 2829 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2830 { 2831 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2832 PetscErrorCode ierr; 2833 2834 PetscFunctionBegin; 2835 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2836 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2837 PetscFunctionReturn(0); 2838 } 2839 2840 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2841 { 2842 Mat_MPIAIJ *b; 2843 PetscErrorCode ierr; 2844 PetscMPIInt size; 2845 2846 PetscFunctionBegin; 2847 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2848 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2849 b = (Mat_MPIAIJ*)B->data; 2850 2851 #if defined(PETSC_USE_CTABLE) 2852 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2853 #else 2854 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2855 #endif 2856 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2857 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2858 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2859 2860 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2861 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 2862 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2863 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2864 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2865 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2866 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2867 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2868 2869 if (!B->preallocated) { 2870 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2871 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2872 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2873 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2874 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2875 } 2876 2877 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2878 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2879 B->preallocated = PETSC_TRUE; 2880 B->was_assembled = PETSC_FALSE; 2881 B->assembled = PETSC_FALSE; 2882 PetscFunctionReturn(0); 2883 } 2884 2885 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2886 { 2887 Mat_MPIAIJ *b; 2888 PetscErrorCode ierr; 2889 2890 PetscFunctionBegin; 2891 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2892 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2893 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2894 b = (Mat_MPIAIJ*)B->data; 2895 2896 #if defined(PETSC_USE_CTABLE) 2897 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2898 #else 2899 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2900 #endif 2901 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2902 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2903 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2904 2905 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2906 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2907 B->preallocated = PETSC_TRUE; 2908 B->was_assembled = PETSC_FALSE; 2909 B->assembled = PETSC_FALSE; 2910 PetscFunctionReturn(0); 2911 } 2912 2913 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2914 { 2915 Mat mat; 2916 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2917 PetscErrorCode ierr; 2918 2919 PetscFunctionBegin; 2920 *newmat = NULL; 2921 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2922 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2923 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2924 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2925 a = (Mat_MPIAIJ*)mat->data; 2926 2927 mat->factortype = matin->factortype; 2928 mat->assembled = matin->assembled; 2929 mat->insertmode = NOT_SET_VALUES; 2930 mat->preallocated = matin->preallocated; 2931 2932 a->size = oldmat->size; 2933 a->rank = oldmat->rank; 2934 a->donotstash = oldmat->donotstash; 2935 a->roworiented = oldmat->roworiented; 2936 a->rowindices = NULL; 2937 a->rowvalues = NULL; 2938 a->getrowactive = PETSC_FALSE; 2939 2940 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2941 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2942 2943 if (oldmat->colmap) { 2944 #if defined(PETSC_USE_CTABLE) 2945 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2946 #else 2947 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2948 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2949 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2950 #endif 2951 } else a->colmap = NULL; 2952 if (oldmat->garray) { 2953 PetscInt len; 2954 len = oldmat->B->cmap->n; 2955 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2956 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2957 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2958 } else a->garray = NULL; 2959 2960 /* It may happen MatDuplicate is called with a non-assembled matrix 2961 In fact, MatDuplicate only requires the matrix to be preallocated 2962 This may happen inside a DMCreateMatrix_Shell */ 2963 if (oldmat->lvec) { 2964 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2965 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2966 } 2967 if (oldmat->Mvctx) { 2968 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2969 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2970 } 2971 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2972 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2973 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2974 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2975 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2976 *newmat = mat; 2977 PetscFunctionReturn(0); 2978 } 2979 2980 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2981 { 2982 PetscBool isbinary, ishdf5; 2983 PetscErrorCode ierr; 2984 2985 PetscFunctionBegin; 2986 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2987 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2988 /* force binary viewer to load .info file if it has not yet done so */ 2989 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2990 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2991 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2992 if (isbinary) { 2993 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2994 } else if (ishdf5) { 2995 #if defined(PETSC_HAVE_HDF5) 2996 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2997 #else 2998 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2999 #endif 3000 } else { 3001 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3002 } 3003 PetscFunctionReturn(0); 3004 } 3005 3006 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3007 { 3008 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3009 PetscInt *rowidxs,*colidxs; 3010 PetscScalar *matvals; 3011 PetscErrorCode ierr; 3012 3013 PetscFunctionBegin; 3014 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3015 3016 /* read in matrix header */ 3017 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3018 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3019 M = header[1]; N = header[2]; nz = header[3]; 3020 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3021 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3022 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3023 3024 /* set block sizes from the viewer's .info file */ 3025 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3026 /* set global sizes if not set already */ 3027 if (mat->rmap->N < 0) mat->rmap->N = M; 3028 if (mat->cmap->N < 0) mat->cmap->N = N; 3029 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3030 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3031 3032 /* check if the matrix sizes are correct */ 3033 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3034 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3035 3036 /* read in row lengths and build row indices */ 3037 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3038 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3039 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3040 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3041 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr); 3042 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3043 /* read in column indices and matrix values */ 3044 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3045 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3046 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3047 /* store matrix indices and values */ 3048 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3049 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3050 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3051 PetscFunctionReturn(0); 3052 } 3053 3054 /* Not scalable because of ISAllGather() unless getting all columns. */ 3055 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3056 { 3057 PetscErrorCode ierr; 3058 IS iscol_local; 3059 PetscBool isstride; 3060 PetscMPIInt lisstride=0,gisstride; 3061 3062 PetscFunctionBegin; 3063 /* check if we are grabbing all columns*/ 3064 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3065 3066 if (isstride) { 3067 PetscInt start,len,mstart,mlen; 3068 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3069 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3070 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3071 if (mstart == start && mlen-mstart == len) lisstride = 1; 3072 } 3073 3074 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3075 if (gisstride) { 3076 PetscInt N; 3077 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3078 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3079 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3080 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3081 } else { 3082 PetscInt cbs; 3083 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3084 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3085 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3086 } 3087 3088 *isseq = iscol_local; 3089 PetscFunctionReturn(0); 3090 } 3091 3092 /* 3093 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3094 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3095 3096 Input Parameters: 3097 mat - matrix 3098 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3099 i.e., mat->rstart <= isrow[i] < mat->rend 3100 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3101 i.e., mat->cstart <= iscol[i] < mat->cend 3102 Output Parameter: 3103 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3104 iscol_o - sequential column index set for retrieving mat->B 3105 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3106 */ 3107 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3108 { 3109 PetscErrorCode ierr; 3110 Vec x,cmap; 3111 const PetscInt *is_idx; 3112 PetscScalar *xarray,*cmaparray; 3113 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3114 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3115 Mat B=a->B; 3116 Vec lvec=a->lvec,lcmap; 3117 PetscInt i,cstart,cend,Bn=B->cmap->N; 3118 MPI_Comm comm; 3119 VecScatter Mvctx=a->Mvctx; 3120 3121 PetscFunctionBegin; 3122 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3123 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3124 3125 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3126 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3127 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3128 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3129 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3130 3131 /* Get start indices */ 3132 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3133 isstart -= ncols; 3134 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3135 3136 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3137 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3138 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3139 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3140 for (i=0; i<ncols; i++) { 3141 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3142 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3143 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3144 } 3145 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3146 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3147 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3148 3149 /* Get iscol_d */ 3150 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3151 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3152 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3153 3154 /* Get isrow_d */ 3155 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3156 rstart = mat->rmap->rstart; 3157 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3158 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3159 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3160 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3161 3162 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3163 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3164 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3165 3166 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3167 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3168 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3169 3170 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3171 3172 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3173 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3174 3175 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3176 /* off-process column indices */ 3177 count = 0; 3178 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3179 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3180 3181 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3182 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3183 for (i=0; i<Bn; i++) { 3184 if (PetscRealPart(xarray[i]) > -1.0) { 3185 idx[count] = i; /* local column index in off-diagonal part B */ 3186 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3187 count++; 3188 } 3189 } 3190 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3191 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3192 3193 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3194 /* cannot ensure iscol_o has same blocksize as iscol! */ 3195 3196 ierr = PetscFree(idx);CHKERRQ(ierr); 3197 *garray = cmap1; 3198 3199 ierr = VecDestroy(&x);CHKERRQ(ierr); 3200 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3201 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3202 PetscFunctionReturn(0); 3203 } 3204 3205 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3206 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3207 { 3208 PetscErrorCode ierr; 3209 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3210 Mat M = NULL; 3211 MPI_Comm comm; 3212 IS iscol_d,isrow_d,iscol_o; 3213 Mat Asub = NULL,Bsub = NULL; 3214 PetscInt n; 3215 3216 PetscFunctionBegin; 3217 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3218 3219 if (call == MAT_REUSE_MATRIX) { 3220 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3221 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3222 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3223 3224 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3225 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3226 3227 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3228 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3229 3230 /* Update diagonal and off-diagonal portions of submat */ 3231 asub = (Mat_MPIAIJ*)(*submat)->data; 3232 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3233 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3234 if (n) { 3235 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3236 } 3237 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3238 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3239 3240 } else { /* call == MAT_INITIAL_MATRIX) */ 3241 const PetscInt *garray; 3242 PetscInt BsubN; 3243 3244 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3245 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3246 3247 /* Create local submatrices Asub and Bsub */ 3248 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3249 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3250 3251 /* Create submatrix M */ 3252 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3253 3254 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3255 asub = (Mat_MPIAIJ*)M->data; 3256 3257 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3258 n = asub->B->cmap->N; 3259 if (BsubN > n) { 3260 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3261 const PetscInt *idx; 3262 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3263 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3264 3265 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3266 j = 0; 3267 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3268 for (i=0; i<n; i++) { 3269 if (j >= BsubN) break; 3270 while (subgarray[i] > garray[j]) j++; 3271 3272 if (subgarray[i] == garray[j]) { 3273 idx_new[i] = idx[j++]; 3274 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3275 } 3276 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3277 3278 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3279 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3280 3281 } else if (BsubN < n) { 3282 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3283 } 3284 3285 ierr = PetscFree(garray);CHKERRQ(ierr); 3286 *submat = M; 3287 3288 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3289 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3290 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3291 3292 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3293 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3294 3295 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3296 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3297 } 3298 PetscFunctionReturn(0); 3299 } 3300 3301 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3302 { 3303 PetscErrorCode ierr; 3304 IS iscol_local=NULL,isrow_d; 3305 PetscInt csize; 3306 PetscInt n,i,j,start,end; 3307 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3308 MPI_Comm comm; 3309 3310 PetscFunctionBegin; 3311 /* If isrow has same processor distribution as mat, 3312 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3313 if (call == MAT_REUSE_MATRIX) { 3314 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3315 if (isrow_d) { 3316 sameRowDist = PETSC_TRUE; 3317 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3318 } else { 3319 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3320 if (iscol_local) { 3321 sameRowDist = PETSC_TRUE; 3322 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3323 } 3324 } 3325 } else { 3326 /* Check if isrow has same processor distribution as mat */ 3327 sameDist[0] = PETSC_FALSE; 3328 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3329 if (!n) { 3330 sameDist[0] = PETSC_TRUE; 3331 } else { 3332 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3333 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3334 if (i >= start && j < end) { 3335 sameDist[0] = PETSC_TRUE; 3336 } 3337 } 3338 3339 /* Check if iscol has same processor distribution as mat */ 3340 sameDist[1] = PETSC_FALSE; 3341 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3342 if (!n) { 3343 sameDist[1] = PETSC_TRUE; 3344 } else { 3345 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3346 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3347 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3348 } 3349 3350 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3351 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr); 3352 sameRowDist = tsameDist[0]; 3353 } 3354 3355 if (sameRowDist) { 3356 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3357 /* isrow and iscol have same processor distribution as mat */ 3358 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3359 PetscFunctionReturn(0); 3360 } else { /* sameRowDist */ 3361 /* isrow has same processor distribution as mat */ 3362 if (call == MAT_INITIAL_MATRIX) { 3363 PetscBool sorted; 3364 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3365 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3366 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3367 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3368 3369 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3370 if (sorted) { 3371 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3372 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3373 PetscFunctionReturn(0); 3374 } 3375 } else { /* call == MAT_REUSE_MATRIX */ 3376 IS iscol_sub; 3377 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3378 if (iscol_sub) { 3379 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3380 PetscFunctionReturn(0); 3381 } 3382 } 3383 } 3384 } 3385 3386 /* General case: iscol -> iscol_local which has global size of iscol */ 3387 if (call == MAT_REUSE_MATRIX) { 3388 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3389 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3390 } else { 3391 if (!iscol_local) { 3392 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3393 } 3394 } 3395 3396 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3397 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3398 3399 if (call == MAT_INITIAL_MATRIX) { 3400 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3401 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3402 } 3403 PetscFunctionReturn(0); 3404 } 3405 3406 /*@C 3407 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3408 and "off-diagonal" part of the matrix in CSR format. 3409 3410 Collective 3411 3412 Input Parameters: 3413 + comm - MPI communicator 3414 . A - "diagonal" portion of matrix 3415 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3416 - garray - global index of B columns 3417 3418 Output Parameter: 3419 . mat - the matrix, with input A as its local diagonal matrix 3420 Level: advanced 3421 3422 Notes: 3423 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3424 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3425 3426 .seealso: MatCreateMPIAIJWithSplitArrays() 3427 @*/ 3428 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3429 { 3430 PetscErrorCode ierr; 3431 Mat_MPIAIJ *maij; 3432 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3433 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3434 const PetscScalar *oa; 3435 Mat Bnew; 3436 PetscInt m,n,N; 3437 3438 PetscFunctionBegin; 3439 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3440 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3441 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3442 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3443 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3444 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3445 3446 /* Get global columns of mat */ 3447 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3448 3449 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3450 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3451 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3452 maij = (Mat_MPIAIJ*)(*mat)->data; 3453 3454 (*mat)->preallocated = PETSC_TRUE; 3455 3456 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3457 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3458 3459 /* Set A as diagonal portion of *mat */ 3460 maij->A = A; 3461 3462 nz = oi[m]; 3463 for (i=0; i<nz; i++) { 3464 col = oj[i]; 3465 oj[i] = garray[col]; 3466 } 3467 3468 /* Set Bnew as off-diagonal portion of *mat */ 3469 ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr); 3470 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr); 3471 ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr); 3472 bnew = (Mat_SeqAIJ*)Bnew->data; 3473 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3474 maij->B = Bnew; 3475 3476 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3477 3478 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3479 b->free_a = PETSC_FALSE; 3480 b->free_ij = PETSC_FALSE; 3481 ierr = MatDestroy(&B);CHKERRQ(ierr); 3482 3483 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3484 bnew->free_a = PETSC_TRUE; 3485 bnew->free_ij = PETSC_TRUE; 3486 3487 /* condense columns of maij->B */ 3488 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3489 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3490 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3491 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3492 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3493 PetscFunctionReturn(0); 3494 } 3495 3496 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3497 3498 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3499 { 3500 PetscErrorCode ierr; 3501 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3502 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3503 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3504 Mat M,Msub,B=a->B; 3505 MatScalar *aa; 3506 Mat_SeqAIJ *aij; 3507 PetscInt *garray = a->garray,*colsub,Ncols; 3508 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3509 IS iscol_sub,iscmap; 3510 const PetscInt *is_idx,*cmap; 3511 PetscBool allcolumns=PETSC_FALSE; 3512 MPI_Comm comm; 3513 3514 PetscFunctionBegin; 3515 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3516 if (call == MAT_REUSE_MATRIX) { 3517 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3518 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3519 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3520 3521 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3522 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3523 3524 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3525 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3526 3527 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3528 3529 } else { /* call == MAT_INITIAL_MATRIX) */ 3530 PetscBool flg; 3531 3532 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3533 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3534 3535 /* (1) iscol -> nonscalable iscol_local */ 3536 /* Check for special case: each processor gets entire matrix columns */ 3537 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3538 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3539 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3540 if (allcolumns) { 3541 iscol_sub = iscol_local; 3542 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3543 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3544 3545 } else { 3546 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3547 PetscInt *idx,*cmap1,k; 3548 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3549 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3550 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3551 count = 0; 3552 k = 0; 3553 for (i=0; i<Ncols; i++) { 3554 j = is_idx[i]; 3555 if (j >= cstart && j < cend) { 3556 /* diagonal part of mat */ 3557 idx[count] = j; 3558 cmap1[count++] = i; /* column index in submat */ 3559 } else if (Bn) { 3560 /* off-diagonal part of mat */ 3561 if (j == garray[k]) { 3562 idx[count] = j; 3563 cmap1[count++] = i; /* column index in submat */ 3564 } else if (j > garray[k]) { 3565 while (j > garray[k] && k < Bn-1) k++; 3566 if (j == garray[k]) { 3567 idx[count] = j; 3568 cmap1[count++] = i; /* column index in submat */ 3569 } 3570 } 3571 } 3572 } 3573 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3574 3575 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3576 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3577 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3578 3579 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3580 } 3581 3582 /* (3) Create sequential Msub */ 3583 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3584 } 3585 3586 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3587 aij = (Mat_SeqAIJ*)(Msub)->data; 3588 ii = aij->i; 3589 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3590 3591 /* 3592 m - number of local rows 3593 Ncols - number of columns (same on all processors) 3594 rstart - first row in new global matrix generated 3595 */ 3596 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3597 3598 if (call == MAT_INITIAL_MATRIX) { 3599 /* (4) Create parallel newmat */ 3600 PetscMPIInt rank,size; 3601 PetscInt csize; 3602 3603 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3604 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3605 3606 /* 3607 Determine the number of non-zeros in the diagonal and off-diagonal 3608 portions of the matrix in order to do correct preallocation 3609 */ 3610 3611 /* first get start and end of "diagonal" columns */ 3612 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3613 if (csize == PETSC_DECIDE) { 3614 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3615 if (mglobal == Ncols) { /* square matrix */ 3616 nlocal = m; 3617 } else { 3618 nlocal = Ncols/size + ((Ncols % size) > rank); 3619 } 3620 } else { 3621 nlocal = csize; 3622 } 3623 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3624 rstart = rend - nlocal; 3625 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3626 3627 /* next, compute all the lengths */ 3628 jj = aij->j; 3629 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3630 olens = dlens + m; 3631 for (i=0; i<m; i++) { 3632 jend = ii[i+1] - ii[i]; 3633 olen = 0; 3634 dlen = 0; 3635 for (j=0; j<jend; j++) { 3636 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3637 else dlen++; 3638 jj++; 3639 } 3640 olens[i] = olen; 3641 dlens[i] = dlen; 3642 } 3643 3644 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3645 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3646 3647 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3648 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3649 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3650 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3651 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3652 ierr = PetscFree(dlens);CHKERRQ(ierr); 3653 3654 } else { /* call == MAT_REUSE_MATRIX */ 3655 M = *newmat; 3656 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3657 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3658 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3659 /* 3660 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3661 rather than the slower MatSetValues(). 3662 */ 3663 M->was_assembled = PETSC_TRUE; 3664 M->assembled = PETSC_FALSE; 3665 } 3666 3667 /* (5) Set values of Msub to *newmat */ 3668 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3669 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3670 3671 jj = aij->j; 3672 ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3673 for (i=0; i<m; i++) { 3674 row = rstart + i; 3675 nz = ii[i+1] - ii[i]; 3676 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3677 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3678 jj += nz; aa += nz; 3679 } 3680 ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3681 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3682 3683 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3684 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3685 3686 ierr = PetscFree(colsub);CHKERRQ(ierr); 3687 3688 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3689 if (call == MAT_INITIAL_MATRIX) { 3690 *newmat = M; 3691 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3692 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3693 3694 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3695 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3696 3697 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3698 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3699 3700 if (iscol_local) { 3701 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3702 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3703 } 3704 } 3705 PetscFunctionReturn(0); 3706 } 3707 3708 /* 3709 Not great since it makes two copies of the submatrix, first an SeqAIJ 3710 in local and then by concatenating the local matrices the end result. 3711 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3712 3713 Note: This requires a sequential iscol with all indices. 3714 */ 3715 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3716 { 3717 PetscErrorCode ierr; 3718 PetscMPIInt rank,size; 3719 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3720 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3721 Mat M,Mreuse; 3722 MatScalar *aa,*vwork; 3723 MPI_Comm comm; 3724 Mat_SeqAIJ *aij; 3725 PetscBool colflag,allcolumns=PETSC_FALSE; 3726 3727 PetscFunctionBegin; 3728 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3729 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3730 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3731 3732 /* Check for special case: each processor gets entire matrix columns */ 3733 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3734 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3735 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3736 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3737 3738 if (call == MAT_REUSE_MATRIX) { 3739 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3740 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3741 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3742 } else { 3743 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3744 } 3745 3746 /* 3747 m - number of local rows 3748 n - number of columns (same on all processors) 3749 rstart - first row in new global matrix generated 3750 */ 3751 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3752 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3753 if (call == MAT_INITIAL_MATRIX) { 3754 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3755 ii = aij->i; 3756 jj = aij->j; 3757 3758 /* 3759 Determine the number of non-zeros in the diagonal and off-diagonal 3760 portions of the matrix in order to do correct preallocation 3761 */ 3762 3763 /* first get start and end of "diagonal" columns */ 3764 if (csize == PETSC_DECIDE) { 3765 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3766 if (mglobal == n) { /* square matrix */ 3767 nlocal = m; 3768 } else { 3769 nlocal = n/size + ((n % size) > rank); 3770 } 3771 } else { 3772 nlocal = csize; 3773 } 3774 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3775 rstart = rend - nlocal; 3776 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3777 3778 /* next, compute all the lengths */ 3779 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3780 olens = dlens + m; 3781 for (i=0; i<m; i++) { 3782 jend = ii[i+1] - ii[i]; 3783 olen = 0; 3784 dlen = 0; 3785 for (j=0; j<jend; j++) { 3786 if (*jj < rstart || *jj >= rend) olen++; 3787 else dlen++; 3788 jj++; 3789 } 3790 olens[i] = olen; 3791 dlens[i] = dlen; 3792 } 3793 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3794 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3795 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3796 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3797 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3798 ierr = PetscFree(dlens);CHKERRQ(ierr); 3799 } else { 3800 PetscInt ml,nl; 3801 3802 M = *newmat; 3803 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3804 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3805 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3806 /* 3807 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3808 rather than the slower MatSetValues(). 3809 */ 3810 M->was_assembled = PETSC_TRUE; 3811 M->assembled = PETSC_FALSE; 3812 } 3813 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3814 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3815 ii = aij->i; 3816 jj = aij->j; 3817 3818 /* trigger copy to CPU if needed */ 3819 ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3820 for (i=0; i<m; i++) { 3821 row = rstart + i; 3822 nz = ii[i+1] - ii[i]; 3823 cwork = jj; jj += nz; 3824 vwork = aa; aa += nz; 3825 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3826 } 3827 ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3828 3829 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3830 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3831 *newmat = M; 3832 3833 /* save submatrix used in processor for next request */ 3834 if (call == MAT_INITIAL_MATRIX) { 3835 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3836 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3837 } 3838 PetscFunctionReturn(0); 3839 } 3840 3841 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3842 { 3843 PetscInt m,cstart, cend,j,nnz,i,d; 3844 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3845 const PetscInt *JJ; 3846 PetscErrorCode ierr; 3847 PetscBool nooffprocentries; 3848 3849 PetscFunctionBegin; 3850 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3851 3852 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3853 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3854 m = B->rmap->n; 3855 cstart = B->cmap->rstart; 3856 cend = B->cmap->rend; 3857 rstart = B->rmap->rstart; 3858 3859 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3860 3861 if (PetscDefined(USE_DEBUG)) { 3862 for (i=0; i<m; i++) { 3863 nnz = Ii[i+1]- Ii[i]; 3864 JJ = J + Ii[i]; 3865 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3866 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3867 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3868 } 3869 } 3870 3871 for (i=0; i<m; i++) { 3872 nnz = Ii[i+1]- Ii[i]; 3873 JJ = J + Ii[i]; 3874 nnz_max = PetscMax(nnz_max,nnz); 3875 d = 0; 3876 for (j=0; j<nnz; j++) { 3877 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3878 } 3879 d_nnz[i] = d; 3880 o_nnz[i] = nnz - d; 3881 } 3882 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3883 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3884 3885 for (i=0; i<m; i++) { 3886 ii = i + rstart; 3887 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3888 } 3889 nooffprocentries = B->nooffprocentries; 3890 B->nooffprocentries = PETSC_TRUE; 3891 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3892 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3893 B->nooffprocentries = nooffprocentries; 3894 3895 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3896 PetscFunctionReturn(0); 3897 } 3898 3899 /*@ 3900 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3901 (the default parallel PETSc format). 3902 3903 Collective 3904 3905 Input Parameters: 3906 + B - the matrix 3907 . i - the indices into j for the start of each local row (starts with zero) 3908 . j - the column indices for each local row (starts with zero) 3909 - v - optional values in the matrix 3910 3911 Level: developer 3912 3913 Notes: 3914 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3915 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3916 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3917 3918 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3919 3920 The format which is used for the sparse matrix input, is equivalent to a 3921 row-major ordering.. i.e for the following matrix, the input data expected is 3922 as shown 3923 3924 $ 1 0 0 3925 $ 2 0 3 P0 3926 $ ------- 3927 $ 4 5 6 P1 3928 $ 3929 $ Process0 [P0]: rows_owned=[0,1] 3930 $ i = {0,1,3} [size = nrow+1 = 2+1] 3931 $ j = {0,0,2} [size = 3] 3932 $ v = {1,2,3} [size = 3] 3933 $ 3934 $ Process1 [P1]: rows_owned=[2] 3935 $ i = {0,3} [size = nrow+1 = 1+1] 3936 $ j = {0,1,2} [size = 3] 3937 $ v = {4,5,6} [size = 3] 3938 3939 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3940 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3941 @*/ 3942 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3943 { 3944 PetscErrorCode ierr; 3945 3946 PetscFunctionBegin; 3947 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3948 PetscFunctionReturn(0); 3949 } 3950 3951 /*@C 3952 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3953 (the default parallel PETSc format). For good matrix assembly performance 3954 the user should preallocate the matrix storage by setting the parameters 3955 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3956 performance can be increased by more than a factor of 50. 3957 3958 Collective 3959 3960 Input Parameters: 3961 + B - the matrix 3962 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3963 (same value is used for all local rows) 3964 . d_nnz - array containing the number of nonzeros in the various rows of the 3965 DIAGONAL portion of the local submatrix (possibly different for each row) 3966 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3967 The size of this array is equal to the number of local rows, i.e 'm'. 3968 For matrices that will be factored, you must leave room for (and set) 3969 the diagonal entry even if it is zero. 3970 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3971 submatrix (same value is used for all local rows). 3972 - o_nnz - array containing the number of nonzeros in the various rows of the 3973 OFF-DIAGONAL portion of the local submatrix (possibly different for 3974 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3975 structure. The size of this array is equal to the number 3976 of local rows, i.e 'm'. 3977 3978 If the *_nnz parameter is given then the *_nz parameter is ignored 3979 3980 The AIJ format (also called the Yale sparse matrix format or 3981 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3982 storage. The stored row and column indices begin with zero. 3983 See Users-Manual: ch_mat for details. 3984 3985 The parallel matrix is partitioned such that the first m0 rows belong to 3986 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3987 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3988 3989 The DIAGONAL portion of the local submatrix of a processor can be defined 3990 as the submatrix which is obtained by extraction the part corresponding to 3991 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3992 first row that belongs to the processor, r2 is the last row belonging to 3993 the this processor, and c1-c2 is range of indices of the local part of a 3994 vector suitable for applying the matrix to. This is an mxn matrix. In the 3995 common case of a square matrix, the row and column ranges are the same and 3996 the DIAGONAL part is also square. The remaining portion of the local 3997 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3998 3999 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4000 4001 You can call MatGetInfo() to get information on how effective the preallocation was; 4002 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4003 You can also run with the option -info and look for messages with the string 4004 malloc in them to see if additional memory allocation was needed. 4005 4006 Example usage: 4007 4008 Consider the following 8x8 matrix with 34 non-zero values, that is 4009 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4010 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4011 as follows: 4012 4013 .vb 4014 1 2 0 | 0 3 0 | 0 4 4015 Proc0 0 5 6 | 7 0 0 | 8 0 4016 9 0 10 | 11 0 0 | 12 0 4017 ------------------------------------- 4018 13 0 14 | 15 16 17 | 0 0 4019 Proc1 0 18 0 | 19 20 21 | 0 0 4020 0 0 0 | 22 23 0 | 24 0 4021 ------------------------------------- 4022 Proc2 25 26 27 | 0 0 28 | 29 0 4023 30 0 0 | 31 32 33 | 0 34 4024 .ve 4025 4026 This can be represented as a collection of submatrices as: 4027 4028 .vb 4029 A B C 4030 D E F 4031 G H I 4032 .ve 4033 4034 Where the submatrices A,B,C are owned by proc0, D,E,F are 4035 owned by proc1, G,H,I are owned by proc2. 4036 4037 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4038 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4039 The 'M','N' parameters are 8,8, and have the same values on all procs. 4040 4041 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4042 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4043 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4044 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4045 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4046 matrix, ans [DF] as another SeqAIJ matrix. 4047 4048 When d_nz, o_nz parameters are specified, d_nz storage elements are 4049 allocated for every row of the local diagonal submatrix, and o_nz 4050 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4051 One way to choose d_nz and o_nz is to use the max nonzerors per local 4052 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4053 In this case, the values of d_nz,o_nz are: 4054 .vb 4055 proc0 : dnz = 2, o_nz = 2 4056 proc1 : dnz = 3, o_nz = 2 4057 proc2 : dnz = 1, o_nz = 4 4058 .ve 4059 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4060 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4061 for proc3. i.e we are using 12+15+10=37 storage locations to store 4062 34 values. 4063 4064 When d_nnz, o_nnz parameters are specified, the storage is specified 4065 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4066 In the above case the values for d_nnz,o_nnz are: 4067 .vb 4068 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4069 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4070 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4071 .ve 4072 Here the space allocated is sum of all the above values i.e 34, and 4073 hence pre-allocation is perfect. 4074 4075 Level: intermediate 4076 4077 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4078 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4079 @*/ 4080 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4081 { 4082 PetscErrorCode ierr; 4083 4084 PetscFunctionBegin; 4085 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4086 PetscValidType(B,1); 4087 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4088 PetscFunctionReturn(0); 4089 } 4090 4091 /*@ 4092 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4093 CSR format for the local rows. 4094 4095 Collective 4096 4097 Input Parameters: 4098 + comm - MPI communicator 4099 . m - number of local rows (Cannot be PETSC_DECIDE) 4100 . n - This value should be the same as the local size used in creating the 4101 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4102 calculated if N is given) For square matrices n is almost always m. 4103 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4104 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4105 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4106 . j - column indices 4107 - a - matrix values 4108 4109 Output Parameter: 4110 . mat - the matrix 4111 4112 Level: intermediate 4113 4114 Notes: 4115 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4116 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4117 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4118 4119 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4120 4121 The format which is used for the sparse matrix input, is equivalent to a 4122 row-major ordering.. i.e for the following matrix, the input data expected is 4123 as shown 4124 4125 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4126 4127 $ 1 0 0 4128 $ 2 0 3 P0 4129 $ ------- 4130 $ 4 5 6 P1 4131 $ 4132 $ Process0 [P0]: rows_owned=[0,1] 4133 $ i = {0,1,3} [size = nrow+1 = 2+1] 4134 $ j = {0,0,2} [size = 3] 4135 $ v = {1,2,3} [size = 3] 4136 $ 4137 $ Process1 [P1]: rows_owned=[2] 4138 $ i = {0,3} [size = nrow+1 = 1+1] 4139 $ j = {0,1,2} [size = 3] 4140 $ v = {4,5,6} [size = 3] 4141 4142 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4143 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4144 @*/ 4145 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4146 { 4147 PetscErrorCode ierr; 4148 4149 PetscFunctionBegin; 4150 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4151 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4152 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4153 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4154 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4155 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4156 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4157 PetscFunctionReturn(0); 4158 } 4159 4160 /*@ 4161 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4162 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4163 4164 Collective 4165 4166 Input Parameters: 4167 + mat - the matrix 4168 . m - number of local rows (Cannot be PETSC_DECIDE) 4169 . n - This value should be the same as the local size used in creating the 4170 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4171 calculated if N is given) For square matrices n is almost always m. 4172 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4173 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4174 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4175 . J - column indices 4176 - v - matrix values 4177 4178 Level: intermediate 4179 4180 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4181 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4182 @*/ 4183 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4184 { 4185 PetscErrorCode ierr; 4186 PetscInt cstart,nnz,i,j; 4187 PetscInt *ld; 4188 PetscBool nooffprocentries; 4189 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4190 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4191 PetscScalar *ad = Ad->a, *ao = Ao->a; 4192 const PetscInt *Adi = Ad->i; 4193 PetscInt ldi,Iii,md; 4194 4195 PetscFunctionBegin; 4196 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4197 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4198 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4199 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4200 4201 cstart = mat->cmap->rstart; 4202 if (!Aij->ld) { 4203 /* count number of entries below block diagonal */ 4204 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4205 Aij->ld = ld; 4206 for (i=0; i<m; i++) { 4207 nnz = Ii[i+1]- Ii[i]; 4208 j = 0; 4209 while (J[j] < cstart && j < nnz) {j++;} 4210 J += nnz; 4211 ld[i] = j; 4212 } 4213 } else { 4214 ld = Aij->ld; 4215 } 4216 4217 for (i=0; i<m; i++) { 4218 nnz = Ii[i+1]- Ii[i]; 4219 Iii = Ii[i]; 4220 ldi = ld[i]; 4221 md = Adi[i+1]-Adi[i]; 4222 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4223 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4224 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4225 ad += md; 4226 ao += nnz - md; 4227 } 4228 nooffprocentries = mat->nooffprocentries; 4229 mat->nooffprocentries = PETSC_TRUE; 4230 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4231 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4232 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4233 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4234 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4235 mat->nooffprocentries = nooffprocentries; 4236 PetscFunctionReturn(0); 4237 } 4238 4239 /*@C 4240 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4241 (the default parallel PETSc format). For good matrix assembly performance 4242 the user should preallocate the matrix storage by setting the parameters 4243 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4244 performance can be increased by more than a factor of 50. 4245 4246 Collective 4247 4248 Input Parameters: 4249 + comm - MPI communicator 4250 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4251 This value should be the same as the local size used in creating the 4252 y vector for the matrix-vector product y = Ax. 4253 . n - This value should be the same as the local size used in creating the 4254 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4255 calculated if N is given) For square matrices n is almost always m. 4256 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4257 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4258 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4259 (same value is used for all local rows) 4260 . d_nnz - array containing the number of nonzeros in the various rows of the 4261 DIAGONAL portion of the local submatrix (possibly different for each row) 4262 or NULL, if d_nz is used to specify the nonzero structure. 4263 The size of this array is equal to the number of local rows, i.e 'm'. 4264 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4265 submatrix (same value is used for all local rows). 4266 - o_nnz - array containing the number of nonzeros in the various rows of the 4267 OFF-DIAGONAL portion of the local submatrix (possibly different for 4268 each row) or NULL, if o_nz is used to specify the nonzero 4269 structure. The size of this array is equal to the number 4270 of local rows, i.e 'm'. 4271 4272 Output Parameter: 4273 . A - the matrix 4274 4275 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4276 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4277 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4278 4279 Notes: 4280 If the *_nnz parameter is given then the *_nz parameter is ignored 4281 4282 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4283 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4284 storage requirements for this matrix. 4285 4286 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4287 processor than it must be used on all processors that share the object for 4288 that argument. 4289 4290 The user MUST specify either the local or global matrix dimensions 4291 (possibly both). 4292 4293 The parallel matrix is partitioned across processors such that the 4294 first m0 rows belong to process 0, the next m1 rows belong to 4295 process 1, the next m2 rows belong to process 2 etc.. where 4296 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4297 values corresponding to [m x N] submatrix. 4298 4299 The columns are logically partitioned with the n0 columns belonging 4300 to 0th partition, the next n1 columns belonging to the next 4301 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4302 4303 The DIAGONAL portion of the local submatrix on any given processor 4304 is the submatrix corresponding to the rows and columns m,n 4305 corresponding to the given processor. i.e diagonal matrix on 4306 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4307 etc. The remaining portion of the local submatrix [m x (N-n)] 4308 constitute the OFF-DIAGONAL portion. The example below better 4309 illustrates this concept. 4310 4311 For a square global matrix we define each processor's diagonal portion 4312 to be its local rows and the corresponding columns (a square submatrix); 4313 each processor's off-diagonal portion encompasses the remainder of the 4314 local matrix (a rectangular submatrix). 4315 4316 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4317 4318 When calling this routine with a single process communicator, a matrix of 4319 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4320 type of communicator, use the construction mechanism 4321 .vb 4322 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4323 .ve 4324 4325 $ MatCreate(...,&A); 4326 $ MatSetType(A,MATMPIAIJ); 4327 $ MatSetSizes(A, m,n,M,N); 4328 $ MatMPIAIJSetPreallocation(A,...); 4329 4330 By default, this format uses inodes (identical nodes) when possible. 4331 We search for consecutive rows with the same nonzero structure, thereby 4332 reusing matrix information to achieve increased efficiency. 4333 4334 Options Database Keys: 4335 + -mat_no_inode - Do not use inodes 4336 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4337 4338 Example usage: 4339 4340 Consider the following 8x8 matrix with 34 non-zero values, that is 4341 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4342 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4343 as follows 4344 4345 .vb 4346 1 2 0 | 0 3 0 | 0 4 4347 Proc0 0 5 6 | 7 0 0 | 8 0 4348 9 0 10 | 11 0 0 | 12 0 4349 ------------------------------------- 4350 13 0 14 | 15 16 17 | 0 0 4351 Proc1 0 18 0 | 19 20 21 | 0 0 4352 0 0 0 | 22 23 0 | 24 0 4353 ------------------------------------- 4354 Proc2 25 26 27 | 0 0 28 | 29 0 4355 30 0 0 | 31 32 33 | 0 34 4356 .ve 4357 4358 This can be represented as a collection of submatrices as 4359 4360 .vb 4361 A B C 4362 D E F 4363 G H I 4364 .ve 4365 4366 Where the submatrices A,B,C are owned by proc0, D,E,F are 4367 owned by proc1, G,H,I are owned by proc2. 4368 4369 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4370 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4371 The 'M','N' parameters are 8,8, and have the same values on all procs. 4372 4373 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4374 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4375 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4376 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4377 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4378 matrix, ans [DF] as another SeqAIJ matrix. 4379 4380 When d_nz, o_nz parameters are specified, d_nz storage elements are 4381 allocated for every row of the local diagonal submatrix, and o_nz 4382 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4383 One way to choose d_nz and o_nz is to use the max nonzerors per local 4384 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4385 In this case, the values of d_nz,o_nz are 4386 .vb 4387 proc0 : dnz = 2, o_nz = 2 4388 proc1 : dnz = 3, o_nz = 2 4389 proc2 : dnz = 1, o_nz = 4 4390 .ve 4391 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4392 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4393 for proc3. i.e we are using 12+15+10=37 storage locations to store 4394 34 values. 4395 4396 When d_nnz, o_nnz parameters are specified, the storage is specified 4397 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4398 In the above case the values for d_nnz,o_nnz are 4399 .vb 4400 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4401 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4402 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4403 .ve 4404 Here the space allocated is sum of all the above values i.e 34, and 4405 hence pre-allocation is perfect. 4406 4407 Level: intermediate 4408 4409 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4410 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4411 @*/ 4412 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4413 { 4414 PetscErrorCode ierr; 4415 PetscMPIInt size; 4416 4417 PetscFunctionBegin; 4418 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4419 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4420 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4421 if (size > 1) { 4422 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4423 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4424 } else { 4425 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4426 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4427 } 4428 PetscFunctionReturn(0); 4429 } 4430 4431 /*@C 4432 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4433 4434 Not collective 4435 4436 Input Parameter: 4437 . A - The MPIAIJ matrix 4438 4439 Output Parameters: 4440 + Ad - The local diagonal block as a SeqAIJ matrix 4441 . Ao - The local off-diagonal block as a SeqAIJ matrix 4442 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4443 4444 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4445 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4446 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4447 local column numbers to global column numbers in the original matrix. 4448 4449 Level: intermediate 4450 4451 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4452 @*/ 4453 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4454 { 4455 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4456 PetscBool flg; 4457 PetscErrorCode ierr; 4458 4459 PetscFunctionBegin; 4460 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4461 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4462 if (Ad) *Ad = a->A; 4463 if (Ao) *Ao = a->B; 4464 if (colmap) *colmap = a->garray; 4465 PetscFunctionReturn(0); 4466 } 4467 4468 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4469 { 4470 PetscErrorCode ierr; 4471 PetscInt m,N,i,rstart,nnz,Ii; 4472 PetscInt *indx; 4473 PetscScalar *values; 4474 4475 PetscFunctionBegin; 4476 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4477 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4478 PetscInt *dnz,*onz,sum,bs,cbs; 4479 4480 if (n == PETSC_DECIDE) { 4481 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4482 } 4483 /* Check sum(n) = N */ 4484 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4485 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4486 4487 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4488 rstart -= m; 4489 4490 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4491 for (i=0; i<m; i++) { 4492 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4493 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4494 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4495 } 4496 4497 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4498 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4499 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4500 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4501 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4502 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4503 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4504 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4505 ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4506 } 4507 4508 /* numeric phase */ 4509 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4510 for (i=0; i<m; i++) { 4511 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4512 Ii = i + rstart; 4513 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4514 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4515 } 4516 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4517 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4518 PetscFunctionReturn(0); 4519 } 4520 4521 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4522 { 4523 PetscErrorCode ierr; 4524 PetscMPIInt rank; 4525 PetscInt m,N,i,rstart,nnz; 4526 size_t len; 4527 const PetscInt *indx; 4528 PetscViewer out; 4529 char *name; 4530 Mat B; 4531 const PetscScalar *values; 4532 4533 PetscFunctionBegin; 4534 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4535 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4536 /* Should this be the type of the diagonal block of A? */ 4537 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4538 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4539 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4540 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4541 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4542 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4543 for (i=0; i<m; i++) { 4544 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4545 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4546 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4547 } 4548 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4549 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4550 4551 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 4552 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4553 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4554 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4555 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4556 ierr = PetscFree(name);CHKERRQ(ierr); 4557 ierr = MatView(B,out);CHKERRQ(ierr); 4558 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4559 ierr = MatDestroy(&B);CHKERRQ(ierr); 4560 PetscFunctionReturn(0); 4561 } 4562 4563 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4564 { 4565 PetscErrorCode ierr; 4566 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4567 4568 PetscFunctionBegin; 4569 if (!merge) PetscFunctionReturn(0); 4570 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4571 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4572 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4573 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4574 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4575 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4576 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4577 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4578 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4579 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4580 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4581 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4582 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4583 ierr = PetscFree(merge);CHKERRQ(ierr); 4584 PetscFunctionReturn(0); 4585 } 4586 4587 #include <../src/mat/utils/freespace.h> 4588 #include <petscbt.h> 4589 4590 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4591 { 4592 PetscErrorCode ierr; 4593 MPI_Comm comm; 4594 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4595 PetscMPIInt size,rank,taga,*len_s; 4596 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4597 PetscInt proc,m; 4598 PetscInt **buf_ri,**buf_rj; 4599 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4600 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4601 MPI_Request *s_waits,*r_waits; 4602 MPI_Status *status; 4603 MatScalar *aa=a->a; 4604 MatScalar **abuf_r,*ba_i; 4605 Mat_Merge_SeqsToMPI *merge; 4606 PetscContainer container; 4607 4608 PetscFunctionBegin; 4609 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4610 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4611 4612 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4613 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4614 4615 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4616 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4617 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4618 4619 bi = merge->bi; 4620 bj = merge->bj; 4621 buf_ri = merge->buf_ri; 4622 buf_rj = merge->buf_rj; 4623 4624 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4625 owners = merge->rowmap->range; 4626 len_s = merge->len_s; 4627 4628 /* send and recv matrix values */ 4629 /*-----------------------------*/ 4630 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4631 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4632 4633 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4634 for (proc=0,k=0; proc<size; proc++) { 4635 if (!len_s[proc]) continue; 4636 i = owners[proc]; 4637 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr); 4638 k++; 4639 } 4640 4641 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);} 4642 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);} 4643 ierr = PetscFree(status);CHKERRQ(ierr); 4644 4645 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4646 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4647 4648 /* insert mat values of mpimat */ 4649 /*----------------------------*/ 4650 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4651 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4652 4653 for (k=0; k<merge->nrecv; k++) { 4654 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4655 nrows = *(buf_ri_k[k]); 4656 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4657 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4658 } 4659 4660 /* set values of ba */ 4661 m = merge->rowmap->n; 4662 for (i=0; i<m; i++) { 4663 arow = owners[rank] + i; 4664 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4665 bnzi = bi[i+1] - bi[i]; 4666 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4667 4668 /* add local non-zero vals of this proc's seqmat into ba */ 4669 anzi = ai[arow+1] - ai[arow]; 4670 aj = a->j + ai[arow]; 4671 aa = a->a + ai[arow]; 4672 nextaj = 0; 4673 for (j=0; nextaj<anzi; j++) { 4674 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4675 ba_i[j] += aa[nextaj++]; 4676 } 4677 } 4678 4679 /* add received vals into ba */ 4680 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4681 /* i-th row */ 4682 if (i == *nextrow[k]) { 4683 anzi = *(nextai[k]+1) - *nextai[k]; 4684 aj = buf_rj[k] + *(nextai[k]); 4685 aa = abuf_r[k] + *(nextai[k]); 4686 nextaj = 0; 4687 for (j=0; nextaj<anzi; j++) { 4688 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4689 ba_i[j] += aa[nextaj++]; 4690 } 4691 } 4692 nextrow[k]++; nextai[k]++; 4693 } 4694 } 4695 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4696 } 4697 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4698 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4699 4700 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4701 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4702 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4703 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4704 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4705 PetscFunctionReturn(0); 4706 } 4707 4708 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4709 { 4710 PetscErrorCode ierr; 4711 Mat B_mpi; 4712 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4713 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4714 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4715 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4716 PetscInt len,proc,*dnz,*onz,bs,cbs; 4717 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4718 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4719 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4720 MPI_Status *status; 4721 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4722 PetscBT lnkbt; 4723 Mat_Merge_SeqsToMPI *merge; 4724 PetscContainer container; 4725 4726 PetscFunctionBegin; 4727 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4728 4729 /* make sure it is a PETSc comm */ 4730 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4731 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4732 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4733 4734 ierr = PetscNew(&merge);CHKERRQ(ierr); 4735 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4736 4737 /* determine row ownership */ 4738 /*---------------------------------------------------------*/ 4739 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4740 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4741 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4742 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4743 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4744 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4745 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4746 4747 m = merge->rowmap->n; 4748 owners = merge->rowmap->range; 4749 4750 /* determine the number of messages to send, their lengths */ 4751 /*---------------------------------------------------------*/ 4752 len_s = merge->len_s; 4753 4754 len = 0; /* length of buf_si[] */ 4755 merge->nsend = 0; 4756 for (proc=0; proc<size; proc++) { 4757 len_si[proc] = 0; 4758 if (proc == rank) { 4759 len_s[proc] = 0; 4760 } else { 4761 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4762 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4763 } 4764 if (len_s[proc]) { 4765 merge->nsend++; 4766 nrows = 0; 4767 for (i=owners[proc]; i<owners[proc+1]; i++) { 4768 if (ai[i+1] > ai[i]) nrows++; 4769 } 4770 len_si[proc] = 2*(nrows+1); 4771 len += len_si[proc]; 4772 } 4773 } 4774 4775 /* determine the number and length of messages to receive for ij-structure */ 4776 /*-------------------------------------------------------------------------*/ 4777 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4778 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4779 4780 /* post the Irecv of j-structure */ 4781 /*-------------------------------*/ 4782 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4783 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4784 4785 /* post the Isend of j-structure */ 4786 /*--------------------------------*/ 4787 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4788 4789 for (proc=0, k=0; proc<size; proc++) { 4790 if (!len_s[proc]) continue; 4791 i = owners[proc]; 4792 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr); 4793 k++; 4794 } 4795 4796 /* receives and sends of j-structure are complete */ 4797 /*------------------------------------------------*/ 4798 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);} 4799 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);} 4800 4801 /* send and recv i-structure */ 4802 /*---------------------------*/ 4803 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4804 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4805 4806 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4807 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4808 for (proc=0,k=0; proc<size; proc++) { 4809 if (!len_s[proc]) continue; 4810 /* form outgoing message for i-structure: 4811 buf_si[0]: nrows to be sent 4812 [1:nrows]: row index (global) 4813 [nrows+1:2*nrows+1]: i-structure index 4814 */ 4815 /*-------------------------------------------*/ 4816 nrows = len_si[proc]/2 - 1; 4817 buf_si_i = buf_si + nrows+1; 4818 buf_si[0] = nrows; 4819 buf_si_i[0] = 0; 4820 nrows = 0; 4821 for (i=owners[proc]; i<owners[proc+1]; i++) { 4822 anzi = ai[i+1] - ai[i]; 4823 if (anzi) { 4824 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4825 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4826 nrows++; 4827 } 4828 } 4829 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr); 4830 k++; 4831 buf_si += len_si[proc]; 4832 } 4833 4834 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);} 4835 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);} 4836 4837 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4838 for (i=0; i<merge->nrecv; i++) { 4839 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4840 } 4841 4842 ierr = PetscFree(len_si);CHKERRQ(ierr); 4843 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4844 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4845 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4846 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4847 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4848 ierr = PetscFree(status);CHKERRQ(ierr); 4849 4850 /* compute a local seq matrix in each processor */ 4851 /*----------------------------------------------*/ 4852 /* allocate bi array and free space for accumulating nonzero column info */ 4853 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4854 bi[0] = 0; 4855 4856 /* create and initialize a linked list */ 4857 nlnk = N+1; 4858 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4859 4860 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4861 len = ai[owners[rank+1]] - ai[owners[rank]]; 4862 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4863 4864 current_space = free_space; 4865 4866 /* determine symbolic info for each local row */ 4867 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4868 4869 for (k=0; k<merge->nrecv; k++) { 4870 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4871 nrows = *buf_ri_k[k]; 4872 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4873 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4874 } 4875 4876 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4877 len = 0; 4878 for (i=0; i<m; i++) { 4879 bnzi = 0; 4880 /* add local non-zero cols of this proc's seqmat into lnk */ 4881 arow = owners[rank] + i; 4882 anzi = ai[arow+1] - ai[arow]; 4883 aj = a->j + ai[arow]; 4884 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4885 bnzi += nlnk; 4886 /* add received col data into lnk */ 4887 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4888 if (i == *nextrow[k]) { /* i-th row */ 4889 anzi = *(nextai[k]+1) - *nextai[k]; 4890 aj = buf_rj[k] + *nextai[k]; 4891 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4892 bnzi += nlnk; 4893 nextrow[k]++; nextai[k]++; 4894 } 4895 } 4896 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4897 4898 /* if free space is not available, make more free space */ 4899 if (current_space->local_remaining<bnzi) { 4900 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4901 nspacedouble++; 4902 } 4903 /* copy data into free space, then initialize lnk */ 4904 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4905 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4906 4907 current_space->array += bnzi; 4908 current_space->local_used += bnzi; 4909 current_space->local_remaining -= bnzi; 4910 4911 bi[i+1] = bi[i] + bnzi; 4912 } 4913 4914 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4915 4916 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4917 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4918 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4919 4920 /* create symbolic parallel matrix B_mpi */ 4921 /*---------------------------------------*/ 4922 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4923 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4924 if (n==PETSC_DECIDE) { 4925 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4926 } else { 4927 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4928 } 4929 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4930 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4931 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4932 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4933 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4934 4935 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4936 B_mpi->assembled = PETSC_FALSE; 4937 merge->bi = bi; 4938 merge->bj = bj; 4939 merge->buf_ri = buf_ri; 4940 merge->buf_rj = buf_rj; 4941 merge->coi = NULL; 4942 merge->coj = NULL; 4943 merge->owners_co = NULL; 4944 4945 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4946 4947 /* attach the supporting struct to B_mpi for reuse */ 4948 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4949 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4950 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 4951 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4952 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4953 *mpimat = B_mpi; 4954 4955 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4956 PetscFunctionReturn(0); 4957 } 4958 4959 /*@C 4960 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4961 matrices from each processor 4962 4963 Collective 4964 4965 Input Parameters: 4966 + comm - the communicators the parallel matrix will live on 4967 . seqmat - the input sequential matrices 4968 . m - number of local rows (or PETSC_DECIDE) 4969 . n - number of local columns (or PETSC_DECIDE) 4970 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4971 4972 Output Parameter: 4973 . mpimat - the parallel matrix generated 4974 4975 Level: advanced 4976 4977 Notes: 4978 The dimensions of the sequential matrix in each processor MUST be the same. 4979 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4980 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4981 @*/ 4982 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4983 { 4984 PetscErrorCode ierr; 4985 PetscMPIInt size; 4986 4987 PetscFunctionBegin; 4988 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4989 if (size == 1) { 4990 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4991 if (scall == MAT_INITIAL_MATRIX) { 4992 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4993 } else { 4994 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4995 } 4996 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4997 PetscFunctionReturn(0); 4998 } 4999 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5000 if (scall == MAT_INITIAL_MATRIX) { 5001 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5002 } 5003 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5004 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5005 PetscFunctionReturn(0); 5006 } 5007 5008 /*@ 5009 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5010 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5011 with MatGetSize() 5012 5013 Not Collective 5014 5015 Input Parameters: 5016 + A - the matrix 5017 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5018 5019 Output Parameter: 5020 . A_loc - the local sequential matrix generated 5021 5022 Level: developer 5023 5024 Notes: 5025 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5026 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5027 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5028 modify the values of the returned A_loc. 5029 5030 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 5031 @*/ 5032 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5033 { 5034 PetscErrorCode ierr; 5035 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5036 Mat_SeqAIJ *mat,*a,*b; 5037 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5038 const PetscScalar *aa,*ba,*aav,*bav; 5039 PetscScalar *ca,*cam; 5040 PetscMPIInt size; 5041 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5042 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5043 PetscBool match; 5044 5045 PetscFunctionBegin; 5046 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5047 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5048 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5049 if (size == 1) { 5050 if (scall == MAT_INITIAL_MATRIX) { 5051 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5052 *A_loc = mpimat->A; 5053 } else if (scall == MAT_REUSE_MATRIX) { 5054 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5055 } 5056 PetscFunctionReturn(0); 5057 } 5058 5059 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5060 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5061 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5062 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5063 ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5064 ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5065 aa = aav; 5066 ba = bav; 5067 if (scall == MAT_INITIAL_MATRIX) { 5068 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5069 ci[0] = 0; 5070 for (i=0; i<am; i++) { 5071 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5072 } 5073 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5074 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5075 k = 0; 5076 for (i=0; i<am; i++) { 5077 ncols_o = bi[i+1] - bi[i]; 5078 ncols_d = ai[i+1] - ai[i]; 5079 /* off-diagonal portion of A */ 5080 for (jo=0; jo<ncols_o; jo++) { 5081 col = cmap[*bj]; 5082 if (col >= cstart) break; 5083 cj[k] = col; bj++; 5084 ca[k++] = *ba++; 5085 } 5086 /* diagonal portion of A */ 5087 for (j=0; j<ncols_d; j++) { 5088 cj[k] = cstart + *aj++; 5089 ca[k++] = *aa++; 5090 } 5091 /* off-diagonal portion of A */ 5092 for (j=jo; j<ncols_o; j++) { 5093 cj[k] = cmap[*bj++]; 5094 ca[k++] = *ba++; 5095 } 5096 } 5097 /* put together the new matrix */ 5098 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5099 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5100 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5101 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5102 mat->free_a = PETSC_TRUE; 5103 mat->free_ij = PETSC_TRUE; 5104 mat->nonew = 0; 5105 } else if (scall == MAT_REUSE_MATRIX) { 5106 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5107 #if defined(PETSC_USE_DEVICE) 5108 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5109 #endif 5110 ci = mat->i; cj = mat->j; cam = mat->a; 5111 for (i=0; i<am; i++) { 5112 /* off-diagonal portion of A */ 5113 ncols_o = bi[i+1] - bi[i]; 5114 for (jo=0; jo<ncols_o; jo++) { 5115 col = cmap[*bj]; 5116 if (col >= cstart) break; 5117 *cam++ = *ba++; bj++; 5118 } 5119 /* diagonal portion of A */ 5120 ncols_d = ai[i+1] - ai[i]; 5121 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5122 /* off-diagonal portion of A */ 5123 for (j=jo; j<ncols_o; j++) { 5124 *cam++ = *ba++; bj++; 5125 } 5126 } 5127 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5128 ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5129 ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5130 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5131 PetscFunctionReturn(0); 5132 } 5133 5134 /*@ 5135 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5136 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5137 5138 Not Collective 5139 5140 Input Parameters: 5141 + A - the matrix 5142 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5143 5144 Output Parameter: 5145 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5146 - A_loc - the local sequential matrix generated 5147 5148 Level: developer 5149 5150 Notes: 5151 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5152 5153 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5154 5155 @*/ 5156 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5157 { 5158 PetscErrorCode ierr; 5159 Mat Ao,Ad; 5160 const PetscInt *cmap; 5161 PetscMPIInt size; 5162 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5163 5164 PetscFunctionBegin; 5165 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr); 5166 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5167 if (size == 1) { 5168 if (scall == MAT_INITIAL_MATRIX) { 5169 ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr); 5170 *A_loc = Ad; 5171 } else if (scall == MAT_REUSE_MATRIX) { 5172 ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5173 } 5174 if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); } 5175 PetscFunctionReturn(0); 5176 } 5177 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr); 5178 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5179 if (f) { 5180 ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr); 5181 } else { 5182 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5183 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5184 Mat_SeqAIJ *c; 5185 PetscInt *ai = a->i, *aj = a->j; 5186 PetscInt *bi = b->i, *bj = b->j; 5187 PetscInt *ci,*cj; 5188 const PetscScalar *aa,*ba; 5189 PetscScalar *ca; 5190 PetscInt i,j,am,dn,on; 5191 5192 ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr); 5193 ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr); 5194 ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr); 5195 ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr); 5196 if (scall == MAT_INITIAL_MATRIX) { 5197 PetscInt k; 5198 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5199 ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr); 5200 ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr); 5201 ci[0] = 0; 5202 for (i=0,k=0; i<am; i++) { 5203 const PetscInt ncols_o = bi[i+1] - bi[i]; 5204 const PetscInt ncols_d = ai[i+1] - ai[i]; 5205 ci[i+1] = ci[i] + ncols_o + ncols_d; 5206 /* diagonal portion of A */ 5207 for (j=0; j<ncols_d; j++,k++) { 5208 cj[k] = *aj++; 5209 ca[k] = *aa++; 5210 } 5211 /* off-diagonal portion of A */ 5212 for (j=0; j<ncols_o; j++,k++) { 5213 cj[k] = dn + *bj++; 5214 ca[k] = *ba++; 5215 } 5216 } 5217 /* put together the new matrix */ 5218 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr); 5219 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5220 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5221 c = (Mat_SeqAIJ*)(*A_loc)->data; 5222 c->free_a = PETSC_TRUE; 5223 c->free_ij = PETSC_TRUE; 5224 c->nonew = 0; 5225 ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr); 5226 } else if (scall == MAT_REUSE_MATRIX) { 5227 #if defined(PETSC_HAVE_DEVICE) 5228 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5229 #endif 5230 c = (Mat_SeqAIJ*)(*A_loc)->data; 5231 ca = c->a; 5232 for (i=0; i<am; i++) { 5233 const PetscInt ncols_d = ai[i+1] - ai[i]; 5234 const PetscInt ncols_o = bi[i+1] - bi[i]; 5235 /* diagonal portion of A */ 5236 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5237 /* off-diagonal portion of A */ 5238 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5239 } 5240 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5241 ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr); 5242 ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr); 5243 if (glob) { 5244 PetscInt cst, *gidx; 5245 5246 ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr); 5247 ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr); 5248 for (i=0; i<dn; i++) gidx[i] = cst + i; 5249 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5250 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr); 5251 } 5252 } 5253 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5254 PetscFunctionReturn(0); 5255 } 5256 5257 /*@C 5258 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5259 5260 Not Collective 5261 5262 Input Parameters: 5263 + A - the matrix 5264 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5265 - row, col - index sets of rows and columns to extract (or NULL) 5266 5267 Output Parameter: 5268 . A_loc - the local sequential matrix generated 5269 5270 Level: developer 5271 5272 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5273 5274 @*/ 5275 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5276 { 5277 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5278 PetscErrorCode ierr; 5279 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5280 IS isrowa,iscola; 5281 Mat *aloc; 5282 PetscBool match; 5283 5284 PetscFunctionBegin; 5285 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5286 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5287 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5288 if (!row) { 5289 start = A->rmap->rstart; end = A->rmap->rend; 5290 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5291 } else { 5292 isrowa = *row; 5293 } 5294 if (!col) { 5295 start = A->cmap->rstart; 5296 cmap = a->garray; 5297 nzA = a->A->cmap->n; 5298 nzB = a->B->cmap->n; 5299 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5300 ncols = 0; 5301 for (i=0; i<nzB; i++) { 5302 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5303 else break; 5304 } 5305 imark = i; 5306 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5307 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5308 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5309 } else { 5310 iscola = *col; 5311 } 5312 if (scall != MAT_INITIAL_MATRIX) { 5313 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5314 aloc[0] = *A_loc; 5315 } 5316 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5317 if (!col) { /* attach global id of condensed columns */ 5318 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5319 } 5320 *A_loc = aloc[0]; 5321 ierr = PetscFree(aloc);CHKERRQ(ierr); 5322 if (!row) { 5323 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5324 } 5325 if (!col) { 5326 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5327 } 5328 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5329 PetscFunctionReturn(0); 5330 } 5331 5332 /* 5333 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5334 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5335 * on a global size. 5336 * */ 5337 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5338 { 5339 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5340 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5341 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5342 PetscMPIInt owner; 5343 PetscSFNode *iremote,*oiremote; 5344 const PetscInt *lrowindices; 5345 PetscErrorCode ierr; 5346 PetscSF sf,osf; 5347 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5348 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5349 MPI_Comm comm; 5350 ISLocalToGlobalMapping mapping; 5351 5352 PetscFunctionBegin; 5353 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5354 /* plocalsize is the number of roots 5355 * nrows is the number of leaves 5356 * */ 5357 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5358 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5359 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5360 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5361 for (i=0;i<nrows;i++) { 5362 /* Find a remote index and an owner for a row 5363 * The row could be local or remote 5364 * */ 5365 owner = 0; 5366 lidx = 0; 5367 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5368 iremote[i].index = lidx; 5369 iremote[i].rank = owner; 5370 } 5371 /* Create SF to communicate how many nonzero columns for each row */ 5372 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5373 /* SF will figure out the number of nonzero colunms for each row, and their 5374 * offsets 5375 * */ 5376 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5377 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5378 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5379 5380 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5381 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5382 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5383 roffsets[0] = 0; 5384 roffsets[1] = 0; 5385 for (i=0;i<plocalsize;i++) { 5386 /* diag */ 5387 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5388 /* off diag */ 5389 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5390 /* compute offsets so that we relative location for each row */ 5391 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5392 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5393 } 5394 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5395 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5396 /* 'r' means root, and 'l' means leaf */ 5397 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5398 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5399 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5400 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5401 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5402 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5403 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5404 dntotalcols = 0; 5405 ontotalcols = 0; 5406 ncol = 0; 5407 for (i=0;i<nrows;i++) { 5408 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5409 ncol = PetscMax(pnnz[i],ncol); 5410 /* diag */ 5411 dntotalcols += nlcols[i*2+0]; 5412 /* off diag */ 5413 ontotalcols += nlcols[i*2+1]; 5414 } 5415 /* We do not need to figure the right number of columns 5416 * since all the calculations will be done by going through the raw data 5417 * */ 5418 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5419 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5420 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5421 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5422 /* diag */ 5423 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5424 /* off diag */ 5425 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5426 /* diag */ 5427 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5428 /* off diag */ 5429 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5430 dntotalcols = 0; 5431 ontotalcols = 0; 5432 ntotalcols = 0; 5433 for (i=0;i<nrows;i++) { 5434 owner = 0; 5435 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5436 /* Set iremote for diag matrix */ 5437 for (j=0;j<nlcols[i*2+0];j++) { 5438 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5439 iremote[dntotalcols].rank = owner; 5440 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5441 ilocal[dntotalcols++] = ntotalcols++; 5442 } 5443 /* off diag */ 5444 for (j=0;j<nlcols[i*2+1];j++) { 5445 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5446 oiremote[ontotalcols].rank = owner; 5447 oilocal[ontotalcols++] = ntotalcols++; 5448 } 5449 } 5450 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5451 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5452 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5453 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5454 /* P serves as roots and P_oth is leaves 5455 * Diag matrix 5456 * */ 5457 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5458 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5459 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5460 5461 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5462 /* Off diag */ 5463 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5464 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5465 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5466 /* We operate on the matrix internal data for saving memory */ 5467 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5468 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5469 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5470 /* Convert to global indices for diag matrix */ 5471 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5472 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5473 /* We want P_oth store global indices */ 5474 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5475 /* Use memory scalable approach */ 5476 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5477 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5478 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5479 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5480 /* Convert back to local indices */ 5481 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5482 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5483 nout = 0; 5484 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5485 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5486 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5487 /* Exchange values */ 5488 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5489 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5490 /* Stop PETSc from shrinking memory */ 5491 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5492 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5493 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5494 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5495 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5496 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5497 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5498 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5499 PetscFunctionReturn(0); 5500 } 5501 5502 /* 5503 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5504 * This supports MPIAIJ and MAIJ 5505 * */ 5506 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5507 { 5508 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5509 Mat_SeqAIJ *p_oth; 5510 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5511 IS rows,map; 5512 PetscHMapI hamp; 5513 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5514 MPI_Comm comm; 5515 PetscSF sf,osf; 5516 PetscBool has; 5517 PetscErrorCode ierr; 5518 5519 PetscFunctionBegin; 5520 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5521 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5522 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5523 * and then create a submatrix (that often is an overlapping matrix) 5524 * */ 5525 if (reuse == MAT_INITIAL_MATRIX) { 5526 /* Use a hash table to figure out unique keys */ 5527 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5528 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5529 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5530 count = 0; 5531 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5532 for (i=0;i<a->B->cmap->n;i++) { 5533 key = a->garray[i]/dof; 5534 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5535 if (!has) { 5536 mapping[i] = count; 5537 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5538 } else { 5539 /* Current 'i' has the same value the previous step */ 5540 mapping[i] = count-1; 5541 } 5542 } 5543 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5544 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5545 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count); 5546 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5547 off = 0; 5548 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5549 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5550 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5551 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5552 /* In case, the matrix was already created but users want to recreate the matrix */ 5553 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5554 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5555 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5556 ierr = ISDestroy(&map);CHKERRQ(ierr); 5557 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5558 } else if (reuse == MAT_REUSE_MATRIX) { 5559 /* If matrix was already created, we simply update values using SF objects 5560 * that as attached to the matrix ealier. 5561 * */ 5562 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5563 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5564 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5565 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5566 /* Update values in place */ 5567 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5568 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5569 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5570 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5571 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5572 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5573 PetscFunctionReturn(0); 5574 } 5575 5576 /*@C 5577 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5578 5579 Collective on Mat 5580 5581 Input Parameters: 5582 + A,B - the matrices in mpiaij format 5583 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5584 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5585 5586 Output Parameter: 5587 + rowb, colb - index sets of rows and columns of B to extract 5588 - B_seq - the sequential matrix generated 5589 5590 Level: developer 5591 5592 @*/ 5593 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5594 { 5595 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5596 PetscErrorCode ierr; 5597 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5598 IS isrowb,iscolb; 5599 Mat *bseq=NULL; 5600 5601 PetscFunctionBegin; 5602 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5603 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5604 } 5605 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5606 5607 if (scall == MAT_INITIAL_MATRIX) { 5608 start = A->cmap->rstart; 5609 cmap = a->garray; 5610 nzA = a->A->cmap->n; 5611 nzB = a->B->cmap->n; 5612 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5613 ncols = 0; 5614 for (i=0; i<nzB; i++) { /* row < local row index */ 5615 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5616 else break; 5617 } 5618 imark = i; 5619 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5620 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5621 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5622 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5623 } else { 5624 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5625 isrowb = *rowb; iscolb = *colb; 5626 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5627 bseq[0] = *B_seq; 5628 } 5629 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5630 *B_seq = bseq[0]; 5631 ierr = PetscFree(bseq);CHKERRQ(ierr); 5632 if (!rowb) { 5633 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5634 } else { 5635 *rowb = isrowb; 5636 } 5637 if (!colb) { 5638 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5639 } else { 5640 *colb = iscolb; 5641 } 5642 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5643 PetscFunctionReturn(0); 5644 } 5645 5646 /* 5647 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5648 of the OFF-DIAGONAL portion of local A 5649 5650 Collective on Mat 5651 5652 Input Parameters: 5653 + A,B - the matrices in mpiaij format 5654 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5655 5656 Output Parameter: 5657 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5658 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5659 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5660 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5661 5662 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5663 for this matrix. This is not desirable.. 5664 5665 Level: developer 5666 5667 */ 5668 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5669 { 5670 PetscErrorCode ierr; 5671 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5672 Mat_SeqAIJ *b_oth; 5673 VecScatter ctx; 5674 MPI_Comm comm; 5675 const PetscMPIInt *rprocs,*sprocs; 5676 const PetscInt *srow,*rstarts,*sstarts; 5677 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5678 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5679 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5680 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5681 PetscMPIInt size,tag,rank,nreqs; 5682 5683 PetscFunctionBegin; 5684 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5685 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5686 5687 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5688 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5689 } 5690 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5691 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 5692 5693 if (size == 1) { 5694 startsj_s = NULL; 5695 bufa_ptr = NULL; 5696 *B_oth = NULL; 5697 PetscFunctionReturn(0); 5698 } 5699 5700 ctx = a->Mvctx; 5701 tag = ((PetscObject)ctx)->tag; 5702 5703 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5704 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5705 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5706 ierr = PetscMPIIntCast(nsends+nrecvs,&nreqs);CHKERRQ(ierr); 5707 ierr = PetscMalloc1(nreqs,&reqs);CHKERRQ(ierr); 5708 rwaits = reqs; 5709 swaits = reqs + nrecvs; 5710 5711 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5712 if (scall == MAT_INITIAL_MATRIX) { 5713 /* i-array */ 5714 /*---------*/ 5715 /* post receives */ 5716 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5717 for (i=0; i<nrecvs; i++) { 5718 rowlen = rvalues + rstarts[i]*rbs; 5719 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5720 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5721 } 5722 5723 /* pack the outgoing message */ 5724 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5725 5726 sstartsj[0] = 0; 5727 rstartsj[0] = 0; 5728 len = 0; /* total length of j or a array to be sent */ 5729 if (nsends) { 5730 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5731 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5732 } 5733 for (i=0; i<nsends; i++) { 5734 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5735 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5736 for (j=0; j<nrows; j++) { 5737 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5738 for (l=0; l<sbs; l++) { 5739 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5740 5741 rowlen[j*sbs+l] = ncols; 5742 5743 len += ncols; 5744 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5745 } 5746 k++; 5747 } 5748 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5749 5750 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5751 } 5752 /* recvs and sends of i-array are completed */ 5753 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5754 ierr = PetscFree(svalues);CHKERRQ(ierr); 5755 5756 /* allocate buffers for sending j and a arrays */ 5757 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5758 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5759 5760 /* create i-array of B_oth */ 5761 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5762 5763 b_othi[0] = 0; 5764 len = 0; /* total length of j or a array to be received */ 5765 k = 0; 5766 for (i=0; i<nrecvs; i++) { 5767 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5768 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5769 for (j=0; j<nrows; j++) { 5770 b_othi[k+1] = b_othi[k] + rowlen[j]; 5771 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5772 k++; 5773 } 5774 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5775 } 5776 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5777 5778 /* allocate space for j and a arrrays of B_oth */ 5779 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5780 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5781 5782 /* j-array */ 5783 /*---------*/ 5784 /* post receives of j-array */ 5785 for (i=0; i<nrecvs; i++) { 5786 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5787 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5788 } 5789 5790 /* pack the outgoing message j-array */ 5791 if (nsends) k = sstarts[0]; 5792 for (i=0; i<nsends; i++) { 5793 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5794 bufJ = bufj+sstartsj[i]; 5795 for (j=0; j<nrows; j++) { 5796 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5797 for (ll=0; ll<sbs; ll++) { 5798 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5799 for (l=0; l<ncols; l++) { 5800 *bufJ++ = cols[l]; 5801 } 5802 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5803 } 5804 } 5805 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5806 } 5807 5808 /* recvs and sends of j-array are completed */ 5809 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5810 } else if (scall == MAT_REUSE_MATRIX) { 5811 sstartsj = *startsj_s; 5812 rstartsj = *startsj_r; 5813 bufa = *bufa_ptr; 5814 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5815 b_otha = b_oth->a; 5816 #if defined(PETSC_HAVE_DEVICE) 5817 (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU; 5818 #endif 5819 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5820 5821 /* a-array */ 5822 /*---------*/ 5823 /* post receives of a-array */ 5824 for (i=0; i<nrecvs; i++) { 5825 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5826 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5827 } 5828 5829 /* pack the outgoing message a-array */ 5830 if (nsends) k = sstarts[0]; 5831 for (i=0; i<nsends; i++) { 5832 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5833 bufA = bufa+sstartsj[i]; 5834 for (j=0; j<nrows; j++) { 5835 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5836 for (ll=0; ll<sbs; ll++) { 5837 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5838 for (l=0; l<ncols; l++) { 5839 *bufA++ = vals[l]; 5840 } 5841 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5842 } 5843 } 5844 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5845 } 5846 /* recvs and sends of a-array are completed */ 5847 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5848 ierr = PetscFree(reqs);CHKERRQ(ierr); 5849 5850 if (scall == MAT_INITIAL_MATRIX) { 5851 /* put together the new matrix */ 5852 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5853 5854 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5855 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5856 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5857 b_oth->free_a = PETSC_TRUE; 5858 b_oth->free_ij = PETSC_TRUE; 5859 b_oth->nonew = 0; 5860 5861 ierr = PetscFree(bufj);CHKERRQ(ierr); 5862 if (!startsj_s || !bufa_ptr) { 5863 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5864 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5865 } else { 5866 *startsj_s = sstartsj; 5867 *startsj_r = rstartsj; 5868 *bufa_ptr = bufa; 5869 } 5870 } 5871 5872 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5873 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5874 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5875 PetscFunctionReturn(0); 5876 } 5877 5878 /*@C 5879 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5880 5881 Not Collective 5882 5883 Input Parameters: 5884 . A - The matrix in mpiaij format 5885 5886 Output Parameter: 5887 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5888 . colmap - A map from global column index to local index into lvec 5889 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5890 5891 Level: developer 5892 5893 @*/ 5894 #if defined(PETSC_USE_CTABLE) 5895 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5896 #else 5897 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5898 #endif 5899 { 5900 Mat_MPIAIJ *a; 5901 5902 PetscFunctionBegin; 5903 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5904 PetscValidPointer(lvec, 2); 5905 PetscValidPointer(colmap, 3); 5906 PetscValidPointer(multScatter, 4); 5907 a = (Mat_MPIAIJ*) A->data; 5908 if (lvec) *lvec = a->lvec; 5909 if (colmap) *colmap = a->colmap; 5910 if (multScatter) *multScatter = a->Mvctx; 5911 PetscFunctionReturn(0); 5912 } 5913 5914 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5915 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5916 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5917 #if defined(PETSC_HAVE_MKL_SPARSE) 5918 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5919 #endif 5920 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5921 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5922 #if defined(PETSC_HAVE_ELEMENTAL) 5923 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5924 #endif 5925 #if defined(PETSC_HAVE_SCALAPACK) 5926 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5927 #endif 5928 #if defined(PETSC_HAVE_HYPRE) 5929 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5930 #endif 5931 #if defined(PETSC_HAVE_CUDA) 5932 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5933 #endif 5934 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5935 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5936 #endif 5937 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5938 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5939 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5940 5941 /* 5942 Computes (B'*A')' since computing B*A directly is untenable 5943 5944 n p p 5945 [ ] [ ] [ ] 5946 m [ A ] * n [ B ] = m [ C ] 5947 [ ] [ ] [ ] 5948 5949 */ 5950 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5951 { 5952 PetscErrorCode ierr; 5953 Mat At,Bt,Ct; 5954 5955 PetscFunctionBegin; 5956 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5957 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5958 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5959 ierr = MatDestroy(&At);CHKERRQ(ierr); 5960 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5961 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5962 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5963 PetscFunctionReturn(0); 5964 } 5965 5966 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5967 { 5968 PetscErrorCode ierr; 5969 PetscBool cisdense; 5970 5971 PetscFunctionBegin; 5972 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5973 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 5974 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5975 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 5976 if (!cisdense) { 5977 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 5978 } 5979 ierr = MatSetUp(C);CHKERRQ(ierr); 5980 5981 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5982 PetscFunctionReturn(0); 5983 } 5984 5985 /* ----------------------------------------------------------------*/ 5986 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5987 { 5988 Mat_Product *product = C->product; 5989 Mat A = product->A,B=product->B; 5990 5991 PetscFunctionBegin; 5992 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5993 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5994 5995 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5996 C->ops->productsymbolic = MatProductSymbolic_AB; 5997 PetscFunctionReturn(0); 5998 } 5999 6000 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6001 { 6002 PetscErrorCode ierr; 6003 Mat_Product *product = C->product; 6004 6005 PetscFunctionBegin; 6006 if (product->type == MATPRODUCT_AB) { 6007 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 6008 } 6009 PetscFunctionReturn(0); 6010 } 6011 /* ----------------------------------------------------------------*/ 6012 6013 /*MC 6014 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6015 6016 Options Database Keys: 6017 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6018 6019 Level: beginner 6020 6021 Notes: 6022 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6023 in this case the values associated with the rows and columns one passes in are set to zero 6024 in the matrix 6025 6026 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6027 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6028 6029 .seealso: MatCreateAIJ() 6030 M*/ 6031 6032 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6033 { 6034 Mat_MPIAIJ *b; 6035 PetscErrorCode ierr; 6036 PetscMPIInt size; 6037 6038 PetscFunctionBegin; 6039 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 6040 6041 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6042 B->data = (void*)b; 6043 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6044 B->assembled = PETSC_FALSE; 6045 B->insertmode = NOT_SET_VALUES; 6046 b->size = size; 6047 6048 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr); 6049 6050 /* build cache for off array entries formed */ 6051 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6052 6053 b->donotstash = PETSC_FALSE; 6054 b->colmap = NULL; 6055 b->garray = NULL; 6056 b->roworiented = PETSC_TRUE; 6057 6058 /* stuff used for matrix vector multiply */ 6059 b->lvec = NULL; 6060 b->Mvctx = NULL; 6061 6062 /* stuff for MatGetRow() */ 6063 b->rowindices = NULL; 6064 b->rowvalues = NULL; 6065 b->getrowactive = PETSC_FALSE; 6066 6067 /* flexible pointer used in CUSPARSE classes */ 6068 b->spptr = NULL; 6069 6070 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6071 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6072 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6073 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6074 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6075 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6076 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6077 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6078 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6079 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6080 #if defined(PETSC_HAVE_CUDA) 6081 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr); 6082 #endif 6083 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr); 6085 #endif 6086 #if defined(PETSC_HAVE_MKL_SPARSE) 6087 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6088 #endif 6089 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6090 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6092 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr); 6093 #if defined(PETSC_HAVE_ELEMENTAL) 6094 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6095 #endif 6096 #if defined(PETSC_HAVE_SCALAPACK) 6097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6098 #endif 6099 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6100 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6101 #if defined(PETSC_HAVE_HYPRE) 6102 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6103 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6104 #endif 6105 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6106 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6107 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6108 PetscFunctionReturn(0); 6109 } 6110 6111 /*@C 6112 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6113 and "off-diagonal" part of the matrix in CSR format. 6114 6115 Collective 6116 6117 Input Parameters: 6118 + comm - MPI communicator 6119 . m - number of local rows (Cannot be PETSC_DECIDE) 6120 . n - This value should be the same as the local size used in creating the 6121 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6122 calculated if N is given) For square matrices n is almost always m. 6123 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6124 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6125 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6126 . j - column indices 6127 . a - matrix values 6128 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6129 . oj - column indices 6130 - oa - matrix values 6131 6132 Output Parameter: 6133 . mat - the matrix 6134 6135 Level: advanced 6136 6137 Notes: 6138 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6139 must free the arrays once the matrix has been destroyed and not before. 6140 6141 The i and j indices are 0 based 6142 6143 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6144 6145 This sets local rows and cannot be used to set off-processor values. 6146 6147 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6148 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6149 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6150 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6151 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6152 communication if it is known that only local entries will be set. 6153 6154 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6155 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6156 @*/ 6157 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6158 { 6159 PetscErrorCode ierr; 6160 Mat_MPIAIJ *maij; 6161 6162 PetscFunctionBegin; 6163 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6164 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6165 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6166 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6167 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6168 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6169 maij = (Mat_MPIAIJ*) (*mat)->data; 6170 6171 (*mat)->preallocated = PETSC_TRUE; 6172 6173 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6174 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6175 6176 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6177 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6178 6179 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6180 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6181 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6182 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6183 6184 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6185 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6186 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6187 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6188 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6189 PetscFunctionReturn(0); 6190 } 6191 6192 /* 6193 Special version for direct calls from Fortran 6194 */ 6195 #include <petsc/private/fortranimpl.h> 6196 6197 /* Change these macros so can be used in void function */ 6198 #undef CHKERRQ 6199 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6200 #undef SETERRQ2 6201 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6202 #undef SETERRQ3 6203 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6204 #undef SETERRQ 6205 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6206 6207 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6208 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6209 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6210 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6211 #else 6212 #endif 6213 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6214 { 6215 Mat mat = *mmat; 6216 PetscInt m = *mm, n = *mn; 6217 InsertMode addv = *maddv; 6218 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6219 PetscScalar value; 6220 PetscErrorCode ierr; 6221 6222 MatCheckPreallocated(mat,1); 6223 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6224 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6225 { 6226 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6227 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6228 PetscBool roworiented = aij->roworiented; 6229 6230 /* Some Variables required in the macro */ 6231 Mat A = aij->A; 6232 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6233 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6234 MatScalar *aa = a->a; 6235 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6236 Mat B = aij->B; 6237 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6238 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6239 MatScalar *ba = b->a; 6240 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6241 * cannot use "#if defined" inside a macro. */ 6242 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6243 6244 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6245 PetscInt nonew = a->nonew; 6246 MatScalar *ap1,*ap2; 6247 6248 PetscFunctionBegin; 6249 for (i=0; i<m; i++) { 6250 if (im[i] < 0) continue; 6251 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6252 if (im[i] >= rstart && im[i] < rend) { 6253 row = im[i] - rstart; 6254 lastcol1 = -1; 6255 rp1 = aj + ai[row]; 6256 ap1 = aa + ai[row]; 6257 rmax1 = aimax[row]; 6258 nrow1 = ailen[row]; 6259 low1 = 0; 6260 high1 = nrow1; 6261 lastcol2 = -1; 6262 rp2 = bj + bi[row]; 6263 ap2 = ba + bi[row]; 6264 rmax2 = bimax[row]; 6265 nrow2 = bilen[row]; 6266 low2 = 0; 6267 high2 = nrow2; 6268 6269 for (j=0; j<n; j++) { 6270 if (roworiented) value = v[i*n+j]; 6271 else value = v[i+j*m]; 6272 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6273 if (in[j] >= cstart && in[j] < cend) { 6274 col = in[j] - cstart; 6275 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6276 #if defined(PETSC_HAVE_DEVICE) 6277 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6278 #endif 6279 } else if (in[j] < 0) continue; 6280 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6281 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6282 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6283 } else { 6284 if (mat->was_assembled) { 6285 if (!aij->colmap) { 6286 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6287 } 6288 #if defined(PETSC_USE_CTABLE) 6289 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6290 col--; 6291 #else 6292 col = aij->colmap[in[j]] - 1; 6293 #endif 6294 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6295 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6296 col = in[j]; 6297 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6298 B = aij->B; 6299 b = (Mat_SeqAIJ*)B->data; 6300 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6301 rp2 = bj + bi[row]; 6302 ap2 = ba + bi[row]; 6303 rmax2 = bimax[row]; 6304 nrow2 = bilen[row]; 6305 low2 = 0; 6306 high2 = nrow2; 6307 bm = aij->B->rmap->n; 6308 ba = b->a; 6309 inserted = PETSC_FALSE; 6310 } 6311 } else col = in[j]; 6312 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6313 #if defined(PETSC_HAVE_DEVICE) 6314 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6315 #endif 6316 } 6317 } 6318 } else if (!aij->donotstash) { 6319 if (roworiented) { 6320 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6321 } else { 6322 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6323 } 6324 } 6325 } 6326 } 6327 PetscFunctionReturnVoid(); 6328 } 6329 6330 typedef struct { 6331 Mat *mp; /* intermediate products */ 6332 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6333 PetscInt cp; /* number of intermediate products */ 6334 6335 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6336 PetscInt *startsj_s,*startsj_r; 6337 PetscScalar *bufa; 6338 Mat P_oth; 6339 6340 /* may take advantage of merging product->B */ 6341 Mat Bloc; /* B-local by merging diag and off-diag */ 6342 6343 /* cusparse does not have support to split between symbolic and numeric phases. 6344 When api_user is true, we don't need to update the numerical values 6345 of the temporary storage */ 6346 PetscBool reusesym; 6347 6348 /* support for COO values insertion */ 6349 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6350 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6351 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6352 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6353 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6354 PetscMemType mtype; 6355 6356 /* customization */ 6357 PetscBool abmerge; 6358 PetscBool P_oth_bind; 6359 } MatMatMPIAIJBACKEND; 6360 6361 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6362 { 6363 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6364 PetscInt i; 6365 PetscErrorCode ierr; 6366 6367 PetscFunctionBegin; 6368 ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr); 6369 ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr); 6370 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr); 6371 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr); 6372 ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr); 6373 ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr); 6374 ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr); 6375 for (i = 0; i < mmdata->cp; i++) { 6376 ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr); 6377 } 6378 ierr = PetscFree2(mmdata->mp,mmdata->mptmp);CHKERRQ(ierr); 6379 ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr); 6380 ierr = PetscFree(mmdata->own);CHKERRQ(ierr); 6381 ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr); 6382 ierr = PetscFree(mmdata->off);CHKERRQ(ierr); 6383 ierr = PetscFree(mmdata);CHKERRQ(ierr); 6384 PetscFunctionReturn(0); 6385 } 6386 6387 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6388 { 6389 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6390 PetscErrorCode ierr; 6391 6392 PetscFunctionBegin; 6393 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr); 6394 if (f) { 6395 ierr = (*f)(A,n,idx,v);CHKERRQ(ierr); 6396 } else { 6397 const PetscScalar *vv; 6398 6399 ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr); 6400 if (n && idx) { 6401 PetscScalar *w = v; 6402 const PetscInt *oi = idx; 6403 PetscInt j; 6404 6405 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6406 } else { 6407 ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr); 6408 } 6409 ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr); 6410 } 6411 PetscFunctionReturn(0); 6412 } 6413 6414 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6415 { 6416 MatMatMPIAIJBACKEND *mmdata; 6417 PetscInt i,n_d,n_o; 6418 PetscErrorCode ierr; 6419 6420 PetscFunctionBegin; 6421 MatCheckProduct(C,1); 6422 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6423 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6424 if (!mmdata->reusesym) { /* update temporary matrices */ 6425 if (mmdata->P_oth) { 6426 ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6427 } 6428 if (mmdata->Bloc) { 6429 ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr); 6430 } 6431 } 6432 mmdata->reusesym = PETSC_FALSE; 6433 6434 for (i = 0; i < mmdata->cp; i++) { 6435 if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6436 ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr); 6437 } 6438 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6439 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6440 6441 if (mmdata->mptmp[i]) continue; 6442 if (noff) { 6443 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6444 6445 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr); 6446 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr); 6447 n_o += noff; 6448 n_d += nown; 6449 } else { 6450 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6451 6452 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr); 6453 n_d += mm->nz; 6454 } 6455 } 6456 if (mmdata->hasoffproc) { /* offprocess insertion */ 6457 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6458 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6459 } 6460 ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr); 6461 PetscFunctionReturn(0); 6462 } 6463 6464 /* Support for Pt * A, A * P, or Pt * A * P */ 6465 #define MAX_NUMBER_INTERMEDIATE 4 6466 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6467 { 6468 Mat_Product *product = C->product; 6469 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6470 Mat_MPIAIJ *a,*p; 6471 MatMatMPIAIJBACKEND *mmdata; 6472 ISLocalToGlobalMapping P_oth_l2g = NULL; 6473 IS glob = NULL; 6474 const char *prefix; 6475 char pprefix[256]; 6476 const PetscInt *globidx,*P_oth_idx; 6477 PetscInt i,j,cp,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j; 6478 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6479 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6480 /* a base offset; type-2: sparse with a local to global map table */ 6481 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6482 6483 MatProductType ptype; 6484 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6485 PetscMPIInt size; 6486 PetscErrorCode ierr; 6487 6488 PetscFunctionBegin; 6489 MatCheckProduct(C,1); 6490 if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6491 ptype = product->type; 6492 if (product->A->symmetric && ptype == MATPRODUCT_AtB) ptype = MATPRODUCT_AB; 6493 switch (ptype) { 6494 case MATPRODUCT_AB: 6495 A = product->A; 6496 P = product->B; 6497 m = A->rmap->n; 6498 n = P->cmap->n; 6499 M = A->rmap->N; 6500 N = P->cmap->N; 6501 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6502 break; 6503 case MATPRODUCT_AtB: 6504 P = product->A; 6505 A = product->B; 6506 m = P->cmap->n; 6507 n = A->cmap->n; 6508 M = P->cmap->N; 6509 N = A->cmap->N; 6510 hasoffproc = PETSC_TRUE; 6511 break; 6512 case MATPRODUCT_PtAP: 6513 A = product->A; 6514 P = product->B; 6515 m = P->cmap->n; 6516 n = P->cmap->n; 6517 M = P->cmap->N; 6518 N = P->cmap->N; 6519 hasoffproc = PETSC_TRUE; 6520 break; 6521 default: 6522 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6523 } 6524 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr); 6525 if (size == 1) hasoffproc = PETSC_FALSE; 6526 6527 /* defaults */ 6528 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6529 mp[i] = NULL; 6530 mptmp[i] = PETSC_FALSE; 6531 rmapt[i] = -1; 6532 cmapt[i] = -1; 6533 rmapa[i] = NULL; 6534 cmapa[i] = NULL; 6535 } 6536 6537 /* customization */ 6538 ierr = PetscNew(&mmdata);CHKERRQ(ierr); 6539 mmdata->reusesym = product->api_user; 6540 if (ptype == MATPRODUCT_AB) { 6541 if (product->api_user) { 6542 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 6543 ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6544 ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6545 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6546 } else { 6547 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 6548 ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6549 ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6550 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6551 } 6552 } else if (ptype == MATPRODUCT_PtAP) { 6553 if (product->api_user) { 6554 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 6555 ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6556 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6557 } else { 6558 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 6559 ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6560 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6561 } 6562 } 6563 a = (Mat_MPIAIJ*)A->data; 6564 p = (Mat_MPIAIJ*)P->data; 6565 ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr); 6566 ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr); 6567 ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr); 6568 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6569 ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr); 6570 6571 cp = 0; 6572 switch (ptype) { 6573 case MATPRODUCT_AB: /* A * P */ 6574 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6575 6576 /* A_diag * P_local (merged or not) */ 6577 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 6578 /* P is product->B */ 6579 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6580 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6581 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6582 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6583 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6584 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6585 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6586 mp[cp]->product->api_user = product->api_user; 6587 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6588 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6589 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6590 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6591 rmapt[cp] = 1; 6592 cmapt[cp] = 2; 6593 cmapa[cp] = globidx; 6594 mptmp[cp] = PETSC_FALSE; 6595 cp++; 6596 } else { /* A_diag * P_diag and A_diag * P_off */ 6597 ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr); 6598 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6599 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6600 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6601 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6602 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6603 mp[cp]->product->api_user = product->api_user; 6604 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6605 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6606 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6607 rmapt[cp] = 1; 6608 cmapt[cp] = 1; 6609 mptmp[cp] = PETSC_FALSE; 6610 cp++; 6611 ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr); 6612 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6613 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6614 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6615 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6616 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6617 mp[cp]->product->api_user = product->api_user; 6618 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6619 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6620 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6621 rmapt[cp] = 1; 6622 cmapt[cp] = 2; 6623 cmapa[cp] = p->garray; 6624 mptmp[cp] = PETSC_FALSE; 6625 cp++; 6626 } 6627 6628 /* A_off * P_other */ 6629 if (mmdata->P_oth) { 6630 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); /* make P_oth use local col ids */ 6631 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6632 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6633 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6634 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6635 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6636 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6637 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6638 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6639 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6640 mp[cp]->product->api_user = product->api_user; 6641 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6642 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6643 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6644 rmapt[cp] = 1; 6645 cmapt[cp] = 2; 6646 cmapa[cp] = P_oth_idx; 6647 mptmp[cp] = PETSC_FALSE; 6648 cp++; 6649 } 6650 break; 6651 6652 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 6653 /* A is product->B */ 6654 ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6655 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 6656 ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6657 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6658 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6659 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6660 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6661 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6662 mp[cp]->product->api_user = product->api_user; 6663 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6664 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6665 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6666 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6667 rmapt[cp] = 2; 6668 rmapa[cp] = globidx; 6669 cmapt[cp] = 2; 6670 cmapa[cp] = globidx; 6671 mptmp[cp] = PETSC_FALSE; 6672 cp++; 6673 } else { 6674 ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6675 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6676 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6677 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6678 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6679 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6680 mp[cp]->product->api_user = product->api_user; 6681 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6682 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6683 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6684 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6685 rmapt[cp] = 1; 6686 cmapt[cp] = 2; 6687 cmapa[cp] = globidx; 6688 mptmp[cp] = PETSC_FALSE; 6689 cp++; 6690 ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6691 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6692 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6693 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6694 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6695 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6696 mp[cp]->product->api_user = product->api_user; 6697 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6698 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6699 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6700 rmapt[cp] = 2; 6701 rmapa[cp] = p->garray; 6702 cmapt[cp] = 2; 6703 cmapa[cp] = globidx; 6704 mptmp[cp] = PETSC_FALSE; 6705 cp++; 6706 } 6707 break; 6708 case MATPRODUCT_PtAP: 6709 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6710 /* P is product->B */ 6711 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6712 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6713 ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr); 6714 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6715 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6716 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6717 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6718 mp[cp]->product->api_user = product->api_user; 6719 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6720 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6721 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6722 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6723 rmapt[cp] = 2; 6724 rmapa[cp] = globidx; 6725 cmapt[cp] = 2; 6726 cmapa[cp] = globidx; 6727 mptmp[cp] = PETSC_FALSE; 6728 cp++; 6729 if (mmdata->P_oth) { 6730 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); 6731 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6732 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6733 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6734 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6735 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6736 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6737 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6738 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6739 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6740 mp[cp]->product->api_user = product->api_user; 6741 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6742 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6743 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6744 mptmp[cp] = PETSC_TRUE; 6745 cp++; 6746 ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr); 6747 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6748 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6749 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6750 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6751 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6752 mp[cp]->product->api_user = product->api_user; 6753 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6754 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6755 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6756 rmapt[cp] = 2; 6757 rmapa[cp] = globidx; 6758 cmapt[cp] = 2; 6759 cmapa[cp] = P_oth_idx; 6760 mptmp[cp] = PETSC_FALSE; 6761 cp++; 6762 } 6763 break; 6764 default: 6765 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6766 } 6767 /* sanity check */ 6768 if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i); 6769 6770 ierr = PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);CHKERRQ(ierr); 6771 for (i = 0; i < cp; i++) { 6772 mmdata->mp[i] = mp[i]; 6773 mmdata->mptmp[i] = mptmp[i]; 6774 } 6775 mmdata->cp = cp; 6776 C->product->data = mmdata; 6777 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 6778 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 6779 6780 /* memory type */ 6781 mmdata->mtype = PETSC_MEMTYPE_HOST; 6782 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr); 6783 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr); 6784 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 6785 // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented 6786 //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE; 6787 6788 /* prepare coo coordinates for values insertion */ 6789 6790 /* count total nonzeros of those intermediate seqaij Mats 6791 ncoo_d: # of nonzeros of matrices that do not have offproc entries 6792 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 6793 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 6794 */ 6795 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 6796 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6797 if (mptmp[cp]) continue; 6798 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 6799 const PetscInt *rmap = rmapa[cp]; 6800 const PetscInt mr = mp[cp]->rmap->n; 6801 const PetscInt rs = C->rmap->rstart; 6802 const PetscInt re = C->rmap->rend; 6803 const PetscInt *ii = mm->i; 6804 for (i = 0; i < mr; i++) { 6805 const PetscInt gr = rmap[i]; 6806 const PetscInt nz = ii[i+1] - ii[i]; 6807 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 6808 else ncoo_oown += nz; /* this row is local */ 6809 } 6810 } else ncoo_d += mm->nz; 6811 } 6812 6813 /* 6814 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 6815 6816 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 6817 6818 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 6819 6820 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 6821 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 6822 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 6823 6824 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 6825 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 6826 */ 6827 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); /* +1 to make a csr-like data structure */ 6828 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr); 6829 6830 /* gather (i,j) of nonzeros inserted by remote procs */ 6831 if (hasoffproc) { 6832 PetscSF msf; 6833 PetscInt ncoo2,*coo_i2,*coo_j2; 6834 6835 ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr); 6836 ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr); 6837 ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); /* to collect (i,j) of entries to be sent to others */ 6838 6839 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 6840 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6841 PetscInt *idxoff = mmdata->off[cp]; 6842 PetscInt *idxown = mmdata->own[cp]; 6843 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 6844 const PetscInt *rmap = rmapa[cp]; 6845 const PetscInt *cmap = cmapa[cp]; 6846 const PetscInt *ii = mm->i; 6847 PetscInt *coi = coo_i + ncoo_o; 6848 PetscInt *coj = coo_j + ncoo_o; 6849 const PetscInt mr = mp[cp]->rmap->n; 6850 const PetscInt rs = C->rmap->rstart; 6851 const PetscInt re = C->rmap->rend; 6852 const PetscInt cs = C->cmap->rstart; 6853 for (i = 0; i < mr; i++) { 6854 const PetscInt *jj = mm->j + ii[i]; 6855 const PetscInt gr = rmap[i]; 6856 const PetscInt nz = ii[i+1] - ii[i]; 6857 if (gr < rs || gr >= re) { /* this is an offproc row */ 6858 for (j = ii[i]; j < ii[i+1]; j++) { 6859 *coi++ = gr; 6860 *idxoff++ = j; 6861 } 6862 if (!cmapt[cp]) { /* already global */ 6863 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6864 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6865 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6866 } else { /* offdiag */ 6867 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6868 } 6869 ncoo_o += nz; 6870 } else { /* this is a local row */ 6871 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 6872 } 6873 } 6874 } 6875 mmdata->off[cp + 1] = idxoff; 6876 mmdata->own[cp + 1] = idxown; 6877 } 6878 6879 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6880 ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr); 6881 ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr); 6882 ierr = PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);CHKERRQ(ierr); 6883 ncoo = ncoo_d + ncoo_oown + ncoo2; 6884 ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr); 6885 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); /* put (i,j) of remote nonzeros at back */ 6886 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6887 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6888 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6889 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6890 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 6891 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr); 6892 coo_i = coo_i2; 6893 coo_j = coo_j2; 6894 } else { /* no offproc values insertion */ 6895 ncoo = ncoo_d; 6896 ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr); 6897 6898 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6899 ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr); 6900 ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr); 6901 } 6902 mmdata->hasoffproc = hasoffproc; 6903 6904 /* gather (i,j) of nonzeros inserted locally */ 6905 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 6906 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6907 PetscInt *coi = coo_i + ncoo_d; 6908 PetscInt *coj = coo_j + ncoo_d; 6909 const PetscInt *jj = mm->j; 6910 const PetscInt *ii = mm->i; 6911 const PetscInt *cmap = cmapa[cp]; 6912 const PetscInt *rmap = rmapa[cp]; 6913 const PetscInt mr = mp[cp]->rmap->n; 6914 const PetscInt rs = C->rmap->rstart; 6915 const PetscInt re = C->rmap->rend; 6916 const PetscInt cs = C->cmap->rstart; 6917 6918 if (mptmp[cp]) continue; 6919 if (rmapt[cp] == 1) { /* consecutive rows */ 6920 /* fill coo_i */ 6921 for (i = 0; i < mr; i++) { 6922 const PetscInt gr = i + rs; 6923 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 6924 } 6925 /* fill coo_j */ 6926 if (!cmapt[cp]) { /* type-0, already global */ 6927 ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr); 6928 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 6929 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 6930 } else { /* type-2, local to global for sparse columns */ 6931 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 6932 } 6933 ncoo_d += mm->nz; 6934 } else if (rmapt[cp] == 2) { /* sparse rows */ 6935 for (i = 0; i < mr; i++) { 6936 const PetscInt *jj = mm->j + ii[i]; 6937 const PetscInt gr = rmap[i]; 6938 const PetscInt nz = ii[i+1] - ii[i]; 6939 if (gr >= rs && gr < re) { /* local rows */ 6940 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 6941 if (!cmapt[cp]) { /* type-0, already global */ 6942 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6943 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6944 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6945 } else { /* type-2, local to global for sparse columns */ 6946 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6947 } 6948 ncoo_d += nz; 6949 } 6950 } 6951 } 6952 } 6953 if (glob) { 6954 ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr); 6955 } 6956 ierr = ISDestroy(&glob);CHKERRQ(ierr); 6957 if (P_oth_l2g) { 6958 ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6959 } 6960 ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr); 6961 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 6962 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr); 6963 6964 /* preallocate with COO data */ 6965 ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr); 6966 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6967 PetscFunctionReturn(0); 6968 } 6969 6970 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 6971 { 6972 Mat_Product *product = mat->product; 6973 PetscErrorCode ierr; 6974 #if defined(PETSC_HAVE_DEVICE) 6975 PetscBool match = PETSC_FALSE; 6976 PetscBool usecpu = PETSC_FALSE; 6977 #else 6978 PetscBool match = PETSC_TRUE; 6979 #endif 6980 6981 PetscFunctionBegin; 6982 MatCheckProduct(mat,1); 6983 #if defined(PETSC_HAVE_DEVICE) 6984 if (!product->A->boundtocpu && !product->B->boundtocpu) { 6985 ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr); 6986 } 6987 if (match) { /* we can always fallback to the CPU if requested */ 6988 switch (product->type) { 6989 case MATPRODUCT_AB: 6990 if (product->api_user) { 6991 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 6992 ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6993 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6994 } else { 6995 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 6996 ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6997 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6998 } 6999 break; 7000 case MATPRODUCT_AtB: 7001 if (product->api_user) { 7002 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr); 7003 ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7004 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7005 } else { 7006 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr); 7007 ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7008 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7009 } 7010 break; 7011 case MATPRODUCT_PtAP: 7012 if (product->api_user) { 7013 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 7014 ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7015 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7016 } else { 7017 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 7018 ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7019 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7020 } 7021 break; 7022 default: 7023 break; 7024 } 7025 match = (PetscBool)!usecpu; 7026 } 7027 #endif 7028 if (match) { 7029 switch (product->type) { 7030 case MATPRODUCT_AB: 7031 case MATPRODUCT_AtB: 7032 case MATPRODUCT_PtAP: 7033 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7034 break; 7035 default: 7036 break; 7037 } 7038 } 7039 /* fallback to MPIAIJ ops */ 7040 if (!mat->ops->productsymbolic) { 7041 ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr); 7042 } 7043 PetscFunctionReturn(0); 7044 } 7045