1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 66 { 67 PetscErrorCode ierr; 68 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 69 70 PetscFunctionBegin; 71 if (mat->A) { 72 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 73 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 74 } 75 PetscFunctionReturn(0); 76 } 77 78 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 79 { 80 PetscErrorCode ierr; 81 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 82 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 83 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 84 const PetscInt *ia,*ib; 85 const MatScalar *aa,*bb,*aav,*bav; 86 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 87 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 88 89 PetscFunctionBegin; 90 *keptrows = NULL; 91 92 ia = a->i; 93 ib = b->i; 94 ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr); 95 ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr); 96 for (i=0; i<m; i++) { 97 na = ia[i+1] - ia[i]; 98 nb = ib[i+1] - ib[i]; 99 if (!na && !nb) { 100 cnt++; 101 goto ok1; 102 } 103 aa = aav + ia[i]; 104 for (j=0; j<na; j++) { 105 if (aa[j] != 0.0) goto ok1; 106 } 107 bb = bav + ib[i]; 108 for (j=0; j <nb; j++) { 109 if (bb[j] != 0.0) goto ok1; 110 } 111 cnt++; 112 ok1:; 113 } 114 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr); 115 if (!n0rows) { 116 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 117 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 121 cnt = 0; 122 for (i=0; i<m; i++) { 123 na = ia[i+1] - ia[i]; 124 nb = ib[i+1] - ib[i]; 125 if (!na && !nb) continue; 126 aa = aav + ia[i]; 127 for (j=0; j<na;j++) { 128 if (aa[j] != 0.0) { 129 rows[cnt++] = rstart + i; 130 goto ok2; 131 } 132 } 133 bb = bav + ib[i]; 134 for (j=0; j<nb; j++) { 135 if (bb[j] != 0.0) { 136 rows[cnt++] = rstart + i; 137 goto ok2; 138 } 139 } 140 ok2:; 141 } 142 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 143 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 144 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 148 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 149 { 150 PetscErrorCode ierr; 151 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 152 PetscBool cong; 153 154 PetscFunctionBegin; 155 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 156 if (Y->assembled && cong) { 157 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 158 } else { 159 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 160 } 161 PetscFunctionReturn(0); 162 } 163 164 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 165 { 166 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 167 PetscErrorCode ierr; 168 PetscInt i,rstart,nrows,*rows; 169 170 PetscFunctionBegin; 171 *zrows = NULL; 172 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 173 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 174 for (i=0; i<nrows; i++) rows[i] += rstart; 175 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 176 PetscFunctionReturn(0); 177 } 178 179 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 180 { 181 PetscErrorCode ierr; 182 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 183 PetscInt i,n,*garray = aij->garray; 184 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 185 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 186 PetscReal *work; 187 const PetscScalar *dummy; 188 189 PetscFunctionBegin; 190 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 191 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 192 ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr); 193 ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr); 194 ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr); 195 ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr); 196 if (type == NORM_2) { 197 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 198 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 199 } 200 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 201 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 202 } 203 } else if (type == NORM_1) { 204 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 205 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 206 } 207 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 208 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 209 } 210 } else if (type == NORM_INFINITY) { 211 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 212 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 213 } 214 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 215 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 216 } 217 218 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 219 if (type == NORM_INFINITY) { 220 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 221 } else { 222 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 223 } 224 ierr = PetscFree(work);CHKERRQ(ierr); 225 if (type == NORM_2) { 226 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 227 } 228 PetscFunctionReturn(0); 229 } 230 231 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 232 { 233 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 234 IS sis,gis; 235 PetscErrorCode ierr; 236 const PetscInt *isis,*igis; 237 PetscInt n,*iis,nsis,ngis,rstart,i; 238 239 PetscFunctionBegin; 240 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 241 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 242 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 243 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 244 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 245 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 246 247 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 248 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 249 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 250 n = ngis + nsis; 251 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 252 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 253 for (i=0; i<n; i++) iis[i] += rstart; 254 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 255 256 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 257 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 258 ierr = ISDestroy(&sis);CHKERRQ(ierr); 259 ierr = ISDestroy(&gis);CHKERRQ(ierr); 260 PetscFunctionReturn(0); 261 } 262 263 /* 264 Local utility routine that creates a mapping from the global column 265 number to the local number in the off-diagonal part of the local 266 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 267 a slightly higher hash table cost; without it it is not scalable (each processor 268 has an order N integer array but is fast to access. 269 */ 270 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 271 { 272 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 273 PetscErrorCode ierr; 274 PetscInt n = aij->B->cmap->n,i; 275 276 PetscFunctionBegin; 277 if (n && !aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 278 #if defined(PETSC_USE_CTABLE) 279 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 280 for (i=0; i<n; i++) { 281 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 282 } 283 #else 284 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 285 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 286 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 287 #endif 288 PetscFunctionReturn(0); 289 } 290 291 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 292 { \ 293 if (col <= lastcol1) low1 = 0; \ 294 else high1 = nrow1; \ 295 lastcol1 = col;\ 296 while (high1-low1 > 5) { \ 297 t = (low1+high1)/2; \ 298 if (rp1[t] > col) high1 = t; \ 299 else low1 = t; \ 300 } \ 301 for (_i=low1; _i<high1; _i++) { \ 302 if (rp1[_i] > col) break; \ 303 if (rp1[_i] == col) { \ 304 if (addv == ADD_VALUES) { \ 305 ap1[_i] += value; \ 306 /* Not sure LogFlops will slow dow the code or not */ \ 307 (void)PetscLogFlops(1.0); \ 308 } \ 309 else ap1[_i] = value; \ 310 inserted = PETSC_TRUE; \ 311 goto a_noinsert; \ 312 } \ 313 } \ 314 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 315 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 316 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 317 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 318 N = nrow1++ - 1; a->nz++; high1++; \ 319 /* shift up all the later entries in this row */ \ 320 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 321 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 322 rp1[_i] = col; \ 323 ap1[_i] = value; \ 324 A->nonzerostate++;\ 325 a_noinsert: ; \ 326 ailen[row] = nrow1; \ 327 } 328 329 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 330 { \ 331 if (col <= lastcol2) low2 = 0; \ 332 else high2 = nrow2; \ 333 lastcol2 = col; \ 334 while (high2-low2 > 5) { \ 335 t = (low2+high2)/2; \ 336 if (rp2[t] > col) high2 = t; \ 337 else low2 = t; \ 338 } \ 339 for (_i=low2; _i<high2; _i++) { \ 340 if (rp2[_i] > col) break; \ 341 if (rp2[_i] == col) { \ 342 if (addv == ADD_VALUES) { \ 343 ap2[_i] += value; \ 344 (void)PetscLogFlops(1.0); \ 345 } \ 346 else ap2[_i] = value; \ 347 inserted = PETSC_TRUE; \ 348 goto b_noinsert; \ 349 } \ 350 } \ 351 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 352 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 353 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 354 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 355 N = nrow2++ - 1; b->nz++; high2++; \ 356 /* shift up all the later entries in this row */ \ 357 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 358 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 359 rp2[_i] = col; \ 360 ap2[_i] = value; \ 361 B->nonzerostate++; \ 362 b_noinsert: ; \ 363 bilen[row] = nrow2; \ 364 } 365 366 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 367 { 368 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 369 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 370 PetscErrorCode ierr; 371 PetscInt l,*garray = mat->garray,diag; 372 373 PetscFunctionBegin; 374 /* code only works for square matrices A */ 375 376 /* find size of row to the left of the diagonal part */ 377 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 378 row = row - diag; 379 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 380 if (garray[b->j[b->i[row]+l]] > diag) break; 381 } 382 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 383 384 /* diagonal part */ 385 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 386 387 /* right of diagonal part */ 388 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 389 #if defined(PETSC_HAVE_DEVICE) 390 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 391 #endif 392 PetscFunctionReturn(0); 393 } 394 395 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 396 { 397 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 398 PetscScalar value = 0.0; 399 PetscErrorCode ierr; 400 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 401 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 402 PetscBool roworiented = aij->roworiented; 403 404 /* Some Variables required in the macro */ 405 Mat A = aij->A; 406 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 407 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 408 PetscBool ignorezeroentries = a->ignorezeroentries; 409 Mat B = aij->B; 410 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 411 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 412 MatScalar *aa,*ba; 413 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 414 * cannot use "#if defined" inside a macro. */ 415 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 416 417 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 418 PetscInt nonew; 419 MatScalar *ap1,*ap2; 420 421 PetscFunctionBegin; 422 #if defined(PETSC_HAVE_DEVICE) 423 if (A->offloadmask == PETSC_OFFLOAD_GPU) { 424 const PetscScalar *dummy; 425 ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr); 426 ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr); 427 } 428 if (B->offloadmask == PETSC_OFFLOAD_GPU) { 429 const PetscScalar *dummy; 430 ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr); 431 ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr); 432 } 433 #endif 434 aa = a->a; 435 ba = b->a; 436 for (i=0; i<m; i++) { 437 if (im[i] < 0) continue; 438 if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 439 if (im[i] >= rstart && im[i] < rend) { 440 row = im[i] - rstart; 441 lastcol1 = -1; 442 rp1 = aj + ai[row]; 443 ap1 = aa + ai[row]; 444 rmax1 = aimax[row]; 445 nrow1 = ailen[row]; 446 low1 = 0; 447 high1 = nrow1; 448 lastcol2 = -1; 449 rp2 = bj + bi[row]; 450 ap2 = ba + bi[row]; 451 rmax2 = bimax[row]; 452 nrow2 = bilen[row]; 453 low2 = 0; 454 high2 = nrow2; 455 456 for (j=0; j<n; j++) { 457 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 458 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 459 if (in[j] >= cstart && in[j] < cend) { 460 col = in[j] - cstart; 461 nonew = a->nonew; 462 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 463 #if defined(PETSC_HAVE_DEVICE) 464 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 465 #endif 466 } else if (in[j] < 0) continue; 467 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 468 else { 469 if (mat->was_assembled) { 470 if (!aij->colmap) { 471 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 472 } 473 #if defined(PETSC_USE_CTABLE) 474 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 475 col--; 476 #else 477 col = aij->colmap[in[j]] - 1; 478 #endif 479 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 480 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 481 col = in[j]; 482 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 483 B = aij->B; 484 b = (Mat_SeqAIJ*)B->data; 485 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 486 rp2 = bj + bi[row]; 487 ap2 = ba + bi[row]; 488 rmax2 = bimax[row]; 489 nrow2 = bilen[row]; 490 low2 = 0; 491 high2 = nrow2; 492 bm = aij->B->rmap->n; 493 ba = b->a; 494 inserted = PETSC_FALSE; 495 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 496 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 497 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 498 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 499 } 500 } else col = in[j]; 501 nonew = b->nonew; 502 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 503 #if defined(PETSC_HAVE_DEVICE) 504 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 505 #endif 506 } 507 } 508 } else { 509 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 510 if (!aij->donotstash) { 511 mat->assembled = PETSC_FALSE; 512 if (roworiented) { 513 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 514 } else { 515 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 516 } 517 } 518 } 519 } 520 PetscFunctionReturn(0); 521 } 522 523 /* 524 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 525 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 526 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 527 */ 528 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 529 { 530 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 531 Mat A = aij->A; /* diagonal part of the matrix */ 532 Mat B = aij->B; /* offdiagonal part of the matrix */ 533 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 534 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 535 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 536 PetscInt *ailen = a->ilen,*aj = a->j; 537 PetscInt *bilen = b->ilen,*bj = b->j; 538 PetscInt am = aij->A->rmap->n,j; 539 PetscInt diag_so_far = 0,dnz; 540 PetscInt offd_so_far = 0,onz; 541 542 PetscFunctionBegin; 543 /* Iterate over all rows of the matrix */ 544 for (j=0; j<am; j++) { 545 dnz = onz = 0; 546 /* Iterate over all non-zero columns of the current row */ 547 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 548 /* If column is in the diagonal */ 549 if (mat_j[col] >= cstart && mat_j[col] < cend) { 550 aj[diag_so_far++] = mat_j[col] - cstart; 551 dnz++; 552 } else { /* off-diagonal entries */ 553 bj[offd_so_far++] = mat_j[col]; 554 onz++; 555 } 556 } 557 ailen[j] = dnz; 558 bilen[j] = onz; 559 } 560 PetscFunctionReturn(0); 561 } 562 563 /* 564 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 565 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 566 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 567 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 568 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 569 */ 570 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 571 { 572 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 573 Mat A = aij->A; /* diagonal part of the matrix */ 574 Mat B = aij->B; /* offdiagonal part of the matrix */ 575 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 576 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 577 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 578 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 579 PetscInt *ailen = a->ilen,*aj = a->j; 580 PetscInt *bilen = b->ilen,*bj = b->j; 581 PetscInt am = aij->A->rmap->n,j; 582 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 583 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 584 PetscScalar *aa = a->a,*ba = b->a; 585 586 PetscFunctionBegin; 587 /* Iterate over all rows of the matrix */ 588 for (j=0; j<am; j++) { 589 dnz_row = onz_row = 0; 590 rowstart_offd = full_offd_i[j]; 591 rowstart_diag = full_diag_i[j]; 592 /* Iterate over all non-zero columns of the current row */ 593 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 594 /* If column is in the diagonal */ 595 if (mat_j[col] >= cstart && mat_j[col] < cend) { 596 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 597 aa[rowstart_diag+dnz_row] = mat_a[col]; 598 dnz_row++; 599 } else { /* off-diagonal entries */ 600 bj[rowstart_offd+onz_row] = mat_j[col]; 601 ba[rowstart_offd+onz_row] = mat_a[col]; 602 onz_row++; 603 } 604 } 605 ailen[j] = dnz_row; 606 bilen[j] = onz_row; 607 } 608 PetscFunctionReturn(0); 609 } 610 611 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 612 { 613 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 614 PetscErrorCode ierr; 615 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 616 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 617 618 PetscFunctionBegin; 619 for (i=0; i<m; i++) { 620 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 621 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 622 if (idxm[i] >= rstart && idxm[i] < rend) { 623 row = idxm[i] - rstart; 624 for (j=0; j<n; j++) { 625 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 626 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 627 if (idxn[j] >= cstart && idxn[j] < cend) { 628 col = idxn[j] - cstart; 629 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 630 } else { 631 if (!aij->colmap) { 632 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 633 } 634 #if defined(PETSC_USE_CTABLE) 635 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 636 col--; 637 #else 638 col = aij->colmap[idxn[j]] - 1; 639 #endif 640 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 641 else { 642 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 643 } 644 } 645 } 646 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 647 } 648 PetscFunctionReturn(0); 649 } 650 651 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 652 { 653 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 654 PetscErrorCode ierr; 655 PetscInt nstash,reallocs; 656 657 PetscFunctionBegin; 658 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 659 660 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 661 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 662 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 667 { 668 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 669 PetscErrorCode ierr; 670 PetscMPIInt n; 671 PetscInt i,j,rstart,ncols,flg; 672 PetscInt *row,*col; 673 PetscBool other_disassembled; 674 PetscScalar *val; 675 676 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 677 678 PetscFunctionBegin; 679 if (!aij->donotstash && !mat->nooffprocentries) { 680 while (1) { 681 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 682 if (!flg) break; 683 684 for (i=0; i<n;) { 685 /* Now identify the consecutive vals belonging to the same row */ 686 for (j=i,rstart=row[j]; j<n; j++) { 687 if (row[j] != rstart) break; 688 } 689 if (j < n) ncols = j-i; 690 else ncols = n-i; 691 /* Now assemble all these values with a single function call */ 692 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 693 i = j; 694 } 695 } 696 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 697 } 698 #if defined(PETSC_HAVE_DEVICE) 699 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 700 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 701 if (mat->boundtocpu) { 702 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 703 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 704 } 705 #endif 706 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 707 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 708 709 /* determine if any processor has disassembled, if so we must 710 also disassemble ourself, in order that we may reassemble. */ 711 /* 712 if nonzero structure of submatrix B cannot change then we know that 713 no processor disassembled thus we can skip this stuff 714 */ 715 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 716 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 717 if (mat->was_assembled && !other_disassembled) { 718 #if defined(PETSC_HAVE_DEVICE) 719 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 720 #endif 721 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 722 } 723 } 724 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 725 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 726 } 727 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 728 #if defined(PETSC_HAVE_DEVICE) 729 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 730 #endif 731 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 733 734 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 735 736 aij->rowvalues = NULL; 737 738 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 739 740 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 741 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 742 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 743 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 744 } 745 #if defined(PETSC_HAVE_DEVICE) 746 mat->offloadmask = PETSC_OFFLOAD_BOTH; 747 #endif 748 PetscFunctionReturn(0); 749 } 750 751 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 752 { 753 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 754 PetscErrorCode ierr; 755 756 PetscFunctionBegin; 757 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 758 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 759 PetscFunctionReturn(0); 760 } 761 762 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 763 { 764 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 765 PetscObjectState sA, sB; 766 PetscInt *lrows; 767 PetscInt r, len; 768 PetscBool cong, lch, gch; 769 PetscErrorCode ierr; 770 771 PetscFunctionBegin; 772 /* get locally owned rows */ 773 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 774 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 775 /* fix right hand side if needed */ 776 if (x && b) { 777 const PetscScalar *xx; 778 PetscScalar *bb; 779 780 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 781 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 782 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 783 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 784 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 785 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 786 } 787 788 sA = mat->A->nonzerostate; 789 sB = mat->B->nonzerostate; 790 791 if (diag != 0.0 && cong) { 792 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 793 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 794 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 795 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 796 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 797 PetscInt nnwA, nnwB; 798 PetscBool nnzA, nnzB; 799 800 nnwA = aijA->nonew; 801 nnwB = aijB->nonew; 802 nnzA = aijA->keepnonzeropattern; 803 nnzB = aijB->keepnonzeropattern; 804 if (!nnzA) { 805 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 806 aijA->nonew = 0; 807 } 808 if (!nnzB) { 809 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 810 aijB->nonew = 0; 811 } 812 /* Must zero here before the next loop */ 813 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 814 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 815 for (r = 0; r < len; ++r) { 816 const PetscInt row = lrows[r] + A->rmap->rstart; 817 if (row >= A->cmap->N) continue; 818 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 819 } 820 aijA->nonew = nnwA; 821 aijB->nonew = nnwB; 822 } else { 823 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 824 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 825 } 826 ierr = PetscFree(lrows);CHKERRQ(ierr); 827 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 828 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 829 830 /* reduce nonzerostate */ 831 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 832 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 833 if (gch) A->nonzerostate++; 834 PetscFunctionReturn(0); 835 } 836 837 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 838 { 839 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 840 PetscErrorCode ierr; 841 PetscMPIInt n = A->rmap->n; 842 PetscInt i,j,r,m,len = 0; 843 PetscInt *lrows,*owners = A->rmap->range; 844 PetscMPIInt p = 0; 845 PetscSFNode *rrows; 846 PetscSF sf; 847 const PetscScalar *xx; 848 PetscScalar *bb,*mask; 849 Vec xmask,lmask; 850 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 851 const PetscInt *aj, *ii,*ridx; 852 PetscScalar *aa; 853 854 PetscFunctionBegin; 855 /* Create SF where leaves are input rows and roots are owned rows */ 856 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 857 for (r = 0; r < n; ++r) lrows[r] = -1; 858 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 859 for (r = 0; r < N; ++r) { 860 const PetscInt idx = rows[r]; 861 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 862 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 863 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 864 } 865 rrows[r].rank = p; 866 rrows[r].index = rows[r] - owners[p]; 867 } 868 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 869 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 870 /* Collect flags for rows to be zeroed */ 871 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 872 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 873 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 874 /* Compress and put in row numbers */ 875 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 876 /* zero diagonal part of matrix */ 877 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 878 /* handle off diagonal part of matrix */ 879 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 880 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 881 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 882 for (i=0; i<len; i++) bb[lrows[i]] = 1; 883 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 884 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 887 if (x && b) { /* this code is buggy when the row and column layout don't match */ 888 PetscBool cong; 889 890 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 891 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 892 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 893 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 894 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 895 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 896 } 897 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 898 /* remove zeroed rows of off diagonal matrix */ 899 ii = aij->i; 900 for (i=0; i<len; i++) { 901 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 902 } 903 /* loop over all elements of off process part of matrix zeroing removed columns*/ 904 if (aij->compressedrow.use) { 905 m = aij->compressedrow.nrows; 906 ii = aij->compressedrow.i; 907 ridx = aij->compressedrow.rindex; 908 for (i=0; i<m; i++) { 909 n = ii[i+1] - ii[i]; 910 aj = aij->j + ii[i]; 911 aa = aij->a + ii[i]; 912 913 for (j=0; j<n; j++) { 914 if (PetscAbsScalar(mask[*aj])) { 915 if (b) bb[*ridx] -= *aa*xx[*aj]; 916 *aa = 0.0; 917 } 918 aa++; 919 aj++; 920 } 921 ridx++; 922 } 923 } else { /* do not use compressed row format */ 924 m = l->B->rmap->n; 925 for (i=0; i<m; i++) { 926 n = ii[i+1] - ii[i]; 927 aj = aij->j + ii[i]; 928 aa = aij->a + ii[i]; 929 for (j=0; j<n; j++) { 930 if (PetscAbsScalar(mask[*aj])) { 931 if (b) bb[i] -= *aa*xx[*aj]; 932 *aa = 0.0; 933 } 934 aa++; 935 aj++; 936 } 937 } 938 } 939 if (x && b) { 940 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 941 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 942 } 943 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 944 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 945 ierr = PetscFree(lrows);CHKERRQ(ierr); 946 947 /* only change matrix nonzero state if pattern was allowed to be changed */ 948 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 949 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 950 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 951 } 952 PetscFunctionReturn(0); 953 } 954 955 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 956 { 957 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 958 PetscErrorCode ierr; 959 PetscInt nt; 960 VecScatter Mvctx = a->Mvctx; 961 962 PetscFunctionBegin; 963 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 964 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 965 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 966 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 967 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 968 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 973 { 974 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 975 PetscErrorCode ierr; 976 977 PetscFunctionBegin; 978 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 979 PetscFunctionReturn(0); 980 } 981 982 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 983 { 984 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 985 PetscErrorCode ierr; 986 VecScatter Mvctx = a->Mvctx; 987 988 PetscFunctionBegin; 989 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 990 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 991 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 997 { 998 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 999 PetscErrorCode ierr; 1000 1001 PetscFunctionBegin; 1002 /* do nondiagonal part */ 1003 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1004 /* do local part */ 1005 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1006 /* add partial results together */ 1007 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1008 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1013 { 1014 MPI_Comm comm; 1015 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1016 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1017 IS Me,Notme; 1018 PetscErrorCode ierr; 1019 PetscInt M,N,first,last,*notme,i; 1020 PetscBool lf; 1021 PetscMPIInt size; 1022 1023 PetscFunctionBegin; 1024 /* Easy test: symmetric diagonal block */ 1025 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1026 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1027 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr); 1028 if (!*f) PetscFunctionReturn(0); 1029 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1030 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 1031 if (size == 1) PetscFunctionReturn(0); 1032 1033 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1034 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1035 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1036 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1037 for (i=0; i<first; i++) notme[i] = i; 1038 for (i=last; i<M; i++) notme[i-last+first] = i; 1039 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1040 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1041 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1042 Aoff = Aoffs[0]; 1043 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1044 Boff = Boffs[0]; 1045 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1046 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1047 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1048 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1049 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1050 ierr = PetscFree(notme);CHKERRQ(ierr); 1051 PetscFunctionReturn(0); 1052 } 1053 1054 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1055 { 1056 PetscErrorCode ierr; 1057 1058 PetscFunctionBegin; 1059 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1060 PetscFunctionReturn(0); 1061 } 1062 1063 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1064 { 1065 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1066 PetscErrorCode ierr; 1067 1068 PetscFunctionBegin; 1069 /* do nondiagonal part */ 1070 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1071 /* do local part */ 1072 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1073 /* add partial results together */ 1074 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1075 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1076 PetscFunctionReturn(0); 1077 } 1078 1079 /* 1080 This only works correctly for square matrices where the subblock A->A is the 1081 diagonal block 1082 */ 1083 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1084 { 1085 PetscErrorCode ierr; 1086 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1087 1088 PetscFunctionBegin; 1089 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1090 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1091 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1096 { 1097 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1098 PetscErrorCode ierr; 1099 1100 PetscFunctionBegin; 1101 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1102 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1107 { 1108 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1109 PetscErrorCode ierr; 1110 1111 PetscFunctionBegin; 1112 #if defined(PETSC_USE_LOG) 1113 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1114 #endif 1115 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1116 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1117 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1118 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1119 #if defined(PETSC_USE_CTABLE) 1120 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1121 #else 1122 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1123 #endif 1124 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1125 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1126 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1127 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1128 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1129 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1130 1131 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1132 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1133 1134 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1135 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1136 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1137 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1138 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1139 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1140 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1141 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1142 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1143 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1144 #if defined(PETSC_HAVE_CUDA) 1145 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1146 #endif 1147 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1148 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr); 1149 #endif 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 1151 #if defined(PETSC_HAVE_ELEMENTAL) 1152 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1153 #endif 1154 #if defined(PETSC_HAVE_SCALAPACK) 1155 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1156 #endif 1157 #if defined(PETSC_HAVE_HYPRE) 1158 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1159 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1160 #endif 1161 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1162 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1163 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1164 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr); 1165 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr); 1167 #if defined(PETSC_HAVE_MKL_SPARSE) 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr); 1169 #endif 1170 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr); 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr); 1173 PetscFunctionReturn(0); 1174 } 1175 1176 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1177 { 1178 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1179 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1180 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1181 const PetscInt *garray = aij->garray; 1182 const PetscScalar *aa,*ba; 1183 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1184 PetscInt *rowlens; 1185 PetscInt *colidxs; 1186 PetscScalar *matvals; 1187 PetscErrorCode ierr; 1188 1189 PetscFunctionBegin; 1190 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1191 1192 M = mat->rmap->N; 1193 N = mat->cmap->N; 1194 m = mat->rmap->n; 1195 rs = mat->rmap->rstart; 1196 cs = mat->cmap->rstart; 1197 nz = A->nz + B->nz; 1198 1199 /* write matrix header */ 1200 header[0] = MAT_FILE_CLASSID; 1201 header[1] = M; header[2] = N; header[3] = nz; 1202 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1203 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1204 1205 /* fill in and store row lengths */ 1206 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1207 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1208 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1209 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1210 1211 /* fill in and store column indices */ 1212 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1213 for (cnt=0, i=0; i<m; i++) { 1214 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1215 if (garray[B->j[jb]] > cs) break; 1216 colidxs[cnt++] = garray[B->j[jb]]; 1217 } 1218 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1219 colidxs[cnt++] = A->j[ja] + cs; 1220 for (; jb<B->i[i+1]; jb++) 1221 colidxs[cnt++] = garray[B->j[jb]]; 1222 } 1223 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1224 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1225 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1226 1227 /* fill in and store nonzero values */ 1228 ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr); 1229 ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr); 1230 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1231 for (cnt=0, i=0; i<m; i++) { 1232 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1233 if (garray[B->j[jb]] > cs) break; 1234 matvals[cnt++] = ba[jb]; 1235 } 1236 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1237 matvals[cnt++] = aa[ja]; 1238 for (; jb<B->i[i+1]; jb++) 1239 matvals[cnt++] = ba[jb]; 1240 } 1241 ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr); 1242 ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr); 1243 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1244 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1245 ierr = PetscFree(matvals);CHKERRQ(ierr); 1246 1247 /* write block size option to the viewer's .info file */ 1248 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #include <petscdraw.h> 1253 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1254 { 1255 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1256 PetscErrorCode ierr; 1257 PetscMPIInt rank = aij->rank,size = aij->size; 1258 PetscBool isdraw,iascii,isbinary; 1259 PetscViewer sviewer; 1260 PetscViewerFormat format; 1261 1262 PetscFunctionBegin; 1263 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1264 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1265 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1266 if (iascii) { 1267 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1268 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1269 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1270 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1271 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1272 for (i=0; i<(PetscInt)size; i++) { 1273 nmax = PetscMax(nmax,nz[i]); 1274 nmin = PetscMin(nmin,nz[i]); 1275 navg += nz[i]; 1276 } 1277 ierr = PetscFree(nz);CHKERRQ(ierr); 1278 navg = navg/size; 1279 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1280 PetscFunctionReturn(0); 1281 } 1282 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1283 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1284 MatInfo info; 1285 PetscInt *inodes=NULL; 1286 1287 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 1288 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1289 ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr); 1290 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1291 if (!inodes) { 1292 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1293 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1294 } else { 1295 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1296 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1297 } 1298 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1299 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1300 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1301 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1302 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1303 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1304 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1305 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1308 PetscInt inodecount,inodelimit,*inodes; 1309 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1310 if (inodes) { 1311 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1312 } else { 1313 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1314 } 1315 PetscFunctionReturn(0); 1316 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1317 PetscFunctionReturn(0); 1318 } 1319 } else if (isbinary) { 1320 if (size == 1) { 1321 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1322 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1323 } else { 1324 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1325 } 1326 PetscFunctionReturn(0); 1327 } else if (iascii && size == 1) { 1328 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1329 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1330 PetscFunctionReturn(0); 1331 } else if (isdraw) { 1332 PetscDraw draw; 1333 PetscBool isnull; 1334 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1335 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1336 if (isnull) PetscFunctionReturn(0); 1337 } 1338 1339 { /* assemble the entire matrix onto first processor */ 1340 Mat A = NULL, Av; 1341 IS isrow,iscol; 1342 1343 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1344 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1345 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1346 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1347 /* The commented code uses MatCreateSubMatrices instead */ 1348 /* 1349 Mat *AA, A = NULL, Av; 1350 IS isrow,iscol; 1351 1352 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1353 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1354 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1355 if (!rank) { 1356 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1357 A = AA[0]; 1358 Av = AA[0]; 1359 } 1360 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1361 */ 1362 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1363 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1364 /* 1365 Everyone has to call to draw the matrix since the graphics waits are 1366 synchronized across all processors that share the PetscDraw object 1367 */ 1368 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1369 if (!rank) { 1370 if (((PetscObject)mat)->name) { 1371 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1372 } 1373 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1374 } 1375 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1376 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1377 ierr = MatDestroy(&A);CHKERRQ(ierr); 1378 } 1379 PetscFunctionReturn(0); 1380 } 1381 1382 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1383 { 1384 PetscErrorCode ierr; 1385 PetscBool iascii,isdraw,issocket,isbinary; 1386 1387 PetscFunctionBegin; 1388 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1389 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1390 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1391 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1392 if (iascii || isdraw || isbinary || issocket) { 1393 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1394 } 1395 PetscFunctionReturn(0); 1396 } 1397 1398 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1399 { 1400 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1401 PetscErrorCode ierr; 1402 Vec bb1 = NULL; 1403 PetscBool hasop; 1404 1405 PetscFunctionBegin; 1406 if (flag == SOR_APPLY_UPPER) { 1407 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1412 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1413 } 1414 1415 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1416 if (flag & SOR_ZERO_INITIAL_GUESS) { 1417 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1418 its--; 1419 } 1420 1421 while (its--) { 1422 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1423 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1424 1425 /* update rhs: bb1 = bb - B*x */ 1426 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1427 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1428 1429 /* local sweep */ 1430 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1431 } 1432 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1433 if (flag & SOR_ZERO_INITIAL_GUESS) { 1434 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1435 its--; 1436 } 1437 while (its--) { 1438 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1439 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1440 1441 /* update rhs: bb1 = bb - B*x */ 1442 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1443 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1444 1445 /* local sweep */ 1446 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1447 } 1448 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1449 if (flag & SOR_ZERO_INITIAL_GUESS) { 1450 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1451 its--; 1452 } 1453 while (its--) { 1454 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1455 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1456 1457 /* update rhs: bb1 = bb - B*x */ 1458 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1459 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1460 1461 /* local sweep */ 1462 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1463 } 1464 } else if (flag & SOR_EISENSTAT) { 1465 Vec xx1; 1466 1467 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1468 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1469 1470 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1471 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1472 if (!mat->diag) { 1473 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1474 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1475 } 1476 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1477 if (hasop) { 1478 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1479 } else { 1480 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1481 } 1482 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1483 1484 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1485 1486 /* local sweep */ 1487 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1488 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1489 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1490 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1491 1492 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1493 1494 matin->factorerrortype = mat->A->factorerrortype; 1495 PetscFunctionReturn(0); 1496 } 1497 1498 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1499 { 1500 Mat aA,aB,Aperm; 1501 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1502 PetscScalar *aa,*ba; 1503 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1504 PetscSF rowsf,sf; 1505 IS parcolp = NULL; 1506 PetscBool done; 1507 PetscErrorCode ierr; 1508 1509 PetscFunctionBegin; 1510 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1511 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1512 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1513 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1514 1515 /* Invert row permutation to find out where my rows should go */ 1516 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1517 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1518 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1519 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1520 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1521 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1522 1523 /* Invert column permutation to find out where my columns should go */ 1524 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1525 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1526 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1527 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1528 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1529 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1530 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1531 1532 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1533 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1534 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1535 1536 /* Find out where my gcols should go */ 1537 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1538 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1539 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1540 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1541 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1542 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1543 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1544 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1545 1546 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1547 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1548 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1549 for (i=0; i<m; i++) { 1550 PetscInt row = rdest[i]; 1551 PetscMPIInt rowner; 1552 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1553 for (j=ai[i]; j<ai[i+1]; j++) { 1554 PetscInt col = cdest[aj[j]]; 1555 PetscMPIInt cowner; 1556 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1557 if (rowner == cowner) dnnz[i]++; 1558 else onnz[i]++; 1559 } 1560 for (j=bi[i]; j<bi[i+1]; j++) { 1561 PetscInt col = gcdest[bj[j]]; 1562 PetscMPIInt cowner; 1563 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1564 if (rowner == cowner) dnnz[i]++; 1565 else onnz[i]++; 1566 } 1567 } 1568 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1569 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1570 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1571 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1572 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1573 1574 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1575 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1576 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1577 for (i=0; i<m; i++) { 1578 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1579 PetscInt j0,rowlen; 1580 rowlen = ai[i+1] - ai[i]; 1581 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1582 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1583 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1584 } 1585 rowlen = bi[i+1] - bi[i]; 1586 for (j0=j=0; j<rowlen; j0=j) { 1587 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1588 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1589 } 1590 } 1591 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1592 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1593 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1594 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1595 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1596 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1597 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1598 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1599 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1600 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1601 *B = Aperm; 1602 PetscFunctionReturn(0); 1603 } 1604 1605 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1606 { 1607 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1608 PetscErrorCode ierr; 1609 1610 PetscFunctionBegin; 1611 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1612 if (ghosts) *ghosts = aij->garray; 1613 PetscFunctionReturn(0); 1614 } 1615 1616 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1617 { 1618 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1619 Mat A = mat->A,B = mat->B; 1620 PetscErrorCode ierr; 1621 PetscLogDouble isend[5],irecv[5]; 1622 1623 PetscFunctionBegin; 1624 info->block_size = 1.0; 1625 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1626 1627 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1628 isend[3] = info->memory; isend[4] = info->mallocs; 1629 1630 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1631 1632 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1633 isend[3] += info->memory; isend[4] += info->mallocs; 1634 if (flag == MAT_LOCAL) { 1635 info->nz_used = isend[0]; 1636 info->nz_allocated = isend[1]; 1637 info->nz_unneeded = isend[2]; 1638 info->memory = isend[3]; 1639 info->mallocs = isend[4]; 1640 } else if (flag == MAT_GLOBAL_MAX) { 1641 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1642 1643 info->nz_used = irecv[0]; 1644 info->nz_allocated = irecv[1]; 1645 info->nz_unneeded = irecv[2]; 1646 info->memory = irecv[3]; 1647 info->mallocs = irecv[4]; 1648 } else if (flag == MAT_GLOBAL_SUM) { 1649 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1650 1651 info->nz_used = irecv[0]; 1652 info->nz_allocated = irecv[1]; 1653 info->nz_unneeded = irecv[2]; 1654 info->memory = irecv[3]; 1655 info->mallocs = irecv[4]; 1656 } 1657 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1658 info->fill_ratio_needed = 0; 1659 info->factor_mallocs = 0; 1660 PetscFunctionReturn(0); 1661 } 1662 1663 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1664 { 1665 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1666 PetscErrorCode ierr; 1667 1668 PetscFunctionBegin; 1669 switch (op) { 1670 case MAT_NEW_NONZERO_LOCATIONS: 1671 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1672 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1673 case MAT_KEEP_NONZERO_PATTERN: 1674 case MAT_NEW_NONZERO_LOCATION_ERR: 1675 case MAT_USE_INODES: 1676 case MAT_IGNORE_ZERO_ENTRIES: 1677 case MAT_FORM_EXPLICIT_TRANSPOSE: 1678 MatCheckPreallocated(A,1); 1679 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1680 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1681 break; 1682 case MAT_ROW_ORIENTED: 1683 MatCheckPreallocated(A,1); 1684 a->roworiented = flg; 1685 1686 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1687 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1688 break; 1689 case MAT_FORCE_DIAGONAL_ENTRIES: 1690 case MAT_SORTED_FULL: 1691 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1692 break; 1693 case MAT_IGNORE_OFF_PROC_ENTRIES: 1694 a->donotstash = flg; 1695 break; 1696 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1697 case MAT_SPD: 1698 case MAT_SYMMETRIC: 1699 case MAT_STRUCTURALLY_SYMMETRIC: 1700 case MAT_HERMITIAN: 1701 case MAT_SYMMETRY_ETERNAL: 1702 break; 1703 case MAT_SUBMAT_SINGLEIS: 1704 A->submat_singleis = flg; 1705 break; 1706 case MAT_STRUCTURE_ONLY: 1707 /* The option is handled directly by MatSetOption() */ 1708 break; 1709 default: 1710 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1711 } 1712 PetscFunctionReturn(0); 1713 } 1714 1715 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1716 { 1717 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1718 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1719 PetscErrorCode ierr; 1720 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1721 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1722 PetscInt *cmap,*idx_p; 1723 1724 PetscFunctionBegin; 1725 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1726 mat->getrowactive = PETSC_TRUE; 1727 1728 if (!mat->rowvalues && (idx || v)) { 1729 /* 1730 allocate enough space to hold information from the longest row. 1731 */ 1732 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1733 PetscInt max = 1,tmp; 1734 for (i=0; i<matin->rmap->n; i++) { 1735 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1736 if (max < tmp) max = tmp; 1737 } 1738 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1739 } 1740 1741 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1742 lrow = row - rstart; 1743 1744 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1745 if (!v) {pvA = NULL; pvB = NULL;} 1746 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1747 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1748 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1749 nztot = nzA + nzB; 1750 1751 cmap = mat->garray; 1752 if (v || idx) { 1753 if (nztot) { 1754 /* Sort by increasing column numbers, assuming A and B already sorted */ 1755 PetscInt imark = -1; 1756 if (v) { 1757 *v = v_p = mat->rowvalues; 1758 for (i=0; i<nzB; i++) { 1759 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1760 else break; 1761 } 1762 imark = i; 1763 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1764 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1765 } 1766 if (idx) { 1767 *idx = idx_p = mat->rowindices; 1768 if (imark > -1) { 1769 for (i=0; i<imark; i++) { 1770 idx_p[i] = cmap[cworkB[i]]; 1771 } 1772 } else { 1773 for (i=0; i<nzB; i++) { 1774 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1775 else break; 1776 } 1777 imark = i; 1778 } 1779 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1780 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1781 } 1782 } else { 1783 if (idx) *idx = NULL; 1784 if (v) *v = NULL; 1785 } 1786 } 1787 *nz = nztot; 1788 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1789 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1790 PetscFunctionReturn(0); 1791 } 1792 1793 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1794 { 1795 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1796 1797 PetscFunctionBegin; 1798 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1799 aij->getrowactive = PETSC_FALSE; 1800 PetscFunctionReturn(0); 1801 } 1802 1803 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1804 { 1805 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1806 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1807 PetscErrorCode ierr; 1808 PetscInt i,j,cstart = mat->cmap->rstart; 1809 PetscReal sum = 0.0; 1810 MatScalar *v; 1811 1812 PetscFunctionBegin; 1813 if (aij->size == 1) { 1814 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1815 } else { 1816 if (type == NORM_FROBENIUS) { 1817 v = amat->a; 1818 for (i=0; i<amat->nz; i++) { 1819 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1820 } 1821 v = bmat->a; 1822 for (i=0; i<bmat->nz; i++) { 1823 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1824 } 1825 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1826 *norm = PetscSqrtReal(*norm); 1827 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1828 } else if (type == NORM_1) { /* max column norm */ 1829 PetscReal *tmp,*tmp2; 1830 PetscInt *jj,*garray = aij->garray; 1831 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1832 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1833 *norm = 0.0; 1834 v = amat->a; jj = amat->j; 1835 for (j=0; j<amat->nz; j++) { 1836 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1837 } 1838 v = bmat->a; jj = bmat->j; 1839 for (j=0; j<bmat->nz; j++) { 1840 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1841 } 1842 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1843 for (j=0; j<mat->cmap->N; j++) { 1844 if (tmp2[j] > *norm) *norm = tmp2[j]; 1845 } 1846 ierr = PetscFree(tmp);CHKERRQ(ierr); 1847 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1848 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1849 } else if (type == NORM_INFINITY) { /* max row norm */ 1850 PetscReal ntemp = 0.0; 1851 for (j=0; j<aij->A->rmap->n; j++) { 1852 v = amat->a + amat->i[j]; 1853 sum = 0.0; 1854 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1855 sum += PetscAbsScalar(*v); v++; 1856 } 1857 v = bmat->a + bmat->i[j]; 1858 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1859 sum += PetscAbsScalar(*v); v++; 1860 } 1861 if (sum > ntemp) ntemp = sum; 1862 } 1863 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1864 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1865 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1866 } 1867 PetscFunctionReturn(0); 1868 } 1869 1870 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1871 { 1872 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1873 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1874 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1875 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1876 PetscErrorCode ierr; 1877 Mat B,A_diag,*B_diag; 1878 const MatScalar *pbv,*bv; 1879 1880 PetscFunctionBegin; 1881 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1882 ai = Aloc->i; aj = Aloc->j; 1883 bi = Bloc->i; bj = Bloc->j; 1884 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1885 PetscInt *d_nnz,*g_nnz,*o_nnz; 1886 PetscSFNode *oloc; 1887 PETSC_UNUSED PetscSF sf; 1888 1889 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1890 /* compute d_nnz for preallocation */ 1891 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 1892 for (i=0; i<ai[ma]; i++) { 1893 d_nnz[aj[i]]++; 1894 } 1895 /* compute local off-diagonal contributions */ 1896 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 1897 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1898 /* map those to global */ 1899 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1900 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1901 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1902 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 1903 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1904 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1905 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1906 1907 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1908 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1909 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1910 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1911 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1912 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1913 } else { 1914 B = *matout; 1915 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1916 } 1917 1918 b = (Mat_MPIAIJ*)B->data; 1919 A_diag = a->A; 1920 B_diag = &b->A; 1921 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1922 A_diag_ncol = A_diag->cmap->N; 1923 B_diag_ilen = sub_B_diag->ilen; 1924 B_diag_i = sub_B_diag->i; 1925 1926 /* Set ilen for diagonal of B */ 1927 for (i=0; i<A_diag_ncol; i++) { 1928 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1929 } 1930 1931 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1932 very quickly (=without using MatSetValues), because all writes are local. */ 1933 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 1934 1935 /* copy over the B part */ 1936 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 1937 ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr); 1938 pbv = bv; 1939 row = A->rmap->rstart; 1940 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1941 cols_tmp = cols; 1942 for (i=0; i<mb; i++) { 1943 ncol = bi[i+1]-bi[i]; 1944 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr); 1945 row++; 1946 pbv += ncol; cols_tmp += ncol; 1947 } 1948 ierr = PetscFree(cols);CHKERRQ(ierr); 1949 ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr); 1950 1951 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1952 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1953 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1954 *matout = B; 1955 } else { 1956 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1957 } 1958 PetscFunctionReturn(0); 1959 } 1960 1961 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1962 { 1963 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1964 Mat a = aij->A,b = aij->B; 1965 PetscErrorCode ierr; 1966 PetscInt s1,s2,s3; 1967 1968 PetscFunctionBegin; 1969 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1970 if (rr) { 1971 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1972 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1973 /* Overlap communication with computation. */ 1974 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1975 } 1976 if (ll) { 1977 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1978 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1979 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1980 } 1981 /* scale the diagonal block */ 1982 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1983 1984 if (rr) { 1985 /* Do a scatter end and then right scale the off-diagonal block */ 1986 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1987 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 1988 } 1989 PetscFunctionReturn(0); 1990 } 1991 1992 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1993 { 1994 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1995 PetscErrorCode ierr; 1996 1997 PetscFunctionBegin; 1998 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1999 PetscFunctionReturn(0); 2000 } 2001 2002 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2003 { 2004 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2005 Mat a,b,c,d; 2006 PetscBool flg; 2007 PetscErrorCode ierr; 2008 2009 PetscFunctionBegin; 2010 a = matA->A; b = matA->B; 2011 c = matB->A; d = matB->B; 2012 2013 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2014 if (flg) { 2015 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2016 } 2017 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2022 { 2023 PetscErrorCode ierr; 2024 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2025 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2026 2027 PetscFunctionBegin; 2028 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2029 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2030 /* because of the column compression in the off-processor part of the matrix a->B, 2031 the number of columns in a->B and b->B may be different, hence we cannot call 2032 the MatCopy() directly on the two parts. If need be, we can provide a more 2033 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2034 then copying the submatrices */ 2035 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2036 } else { 2037 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2038 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2039 } 2040 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2041 PetscFunctionReturn(0); 2042 } 2043 2044 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2045 { 2046 PetscErrorCode ierr; 2047 2048 PetscFunctionBegin; 2049 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2050 PetscFunctionReturn(0); 2051 } 2052 2053 /* 2054 Computes the number of nonzeros per row needed for preallocation when X and Y 2055 have different nonzero structure. 2056 */ 2057 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2058 { 2059 PetscInt i,j,k,nzx,nzy; 2060 2061 PetscFunctionBegin; 2062 /* Set the number of nonzeros in the new matrix */ 2063 for (i=0; i<m; i++) { 2064 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2065 nzx = xi[i+1] - xi[i]; 2066 nzy = yi[i+1] - yi[i]; 2067 nnz[i] = 0; 2068 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2069 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2070 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2071 nnz[i]++; 2072 } 2073 for (; k<nzy; k++) nnz[i]++; 2074 } 2075 PetscFunctionReturn(0); 2076 } 2077 2078 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2079 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2080 { 2081 PetscErrorCode ierr; 2082 PetscInt m = Y->rmap->N; 2083 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2084 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2085 2086 PetscFunctionBegin; 2087 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2088 PetscFunctionReturn(0); 2089 } 2090 2091 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2092 { 2093 PetscErrorCode ierr; 2094 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2095 2096 PetscFunctionBegin; 2097 if (str == SAME_NONZERO_PATTERN) { 2098 ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr); 2099 ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr); 2100 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2101 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2102 } else { 2103 Mat B; 2104 PetscInt *nnz_d,*nnz_o; 2105 2106 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2107 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2108 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2109 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2110 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2111 ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr); 2112 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2113 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2114 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2115 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2116 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2117 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2118 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2119 } 2120 PetscFunctionReturn(0); 2121 } 2122 2123 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2124 2125 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2126 { 2127 #if defined(PETSC_USE_COMPLEX) 2128 PetscErrorCode ierr; 2129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2130 2131 PetscFunctionBegin; 2132 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2133 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2134 #else 2135 PetscFunctionBegin; 2136 #endif 2137 PetscFunctionReturn(0); 2138 } 2139 2140 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2141 { 2142 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2143 PetscErrorCode ierr; 2144 2145 PetscFunctionBegin; 2146 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2147 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2152 { 2153 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2154 PetscErrorCode ierr; 2155 2156 PetscFunctionBegin; 2157 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2158 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2159 PetscFunctionReturn(0); 2160 } 2161 2162 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2163 { 2164 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2165 PetscErrorCode ierr; 2166 PetscInt i,*idxb = NULL,m = A->rmap->n; 2167 PetscScalar *va,*vv; 2168 Vec vB,vA; 2169 const PetscScalar *vb; 2170 2171 PetscFunctionBegin; 2172 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 2173 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2174 2175 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2176 if (idx) { 2177 for (i=0; i<m; i++) { 2178 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2179 } 2180 } 2181 2182 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 2183 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 2184 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2185 2186 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2187 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2188 for (i=0; i<m; i++) { 2189 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2190 vv[i] = vb[i]; 2191 if (idx) idx[i] = a->garray[idxb[i]]; 2192 } else { 2193 vv[i] = va[i]; 2194 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2195 idx[i] = a->garray[idxb[i]]; 2196 } 2197 } 2198 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2199 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2200 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2201 ierr = PetscFree(idxb);CHKERRQ(ierr); 2202 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2203 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2204 PetscFunctionReturn(0); 2205 } 2206 2207 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2208 { 2209 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2210 PetscInt m = A->rmap->n,n = A->cmap->n; 2211 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2212 PetscInt *cmap = mat->garray; 2213 PetscInt *diagIdx, *offdiagIdx; 2214 Vec diagV, offdiagV; 2215 PetscScalar *a, *diagA, *offdiagA; 2216 const PetscScalar *ba,*bav; 2217 PetscInt r,j,col,ncols,*bi,*bj; 2218 PetscErrorCode ierr; 2219 Mat B = mat->B; 2220 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2221 2222 PetscFunctionBegin; 2223 /* When a process holds entire A and other processes have no entry */ 2224 if (A->cmap->N == n) { 2225 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2226 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2227 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr); 2228 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2229 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } else if (n == 0) { 2232 if (m) { 2233 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2234 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2235 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2236 } 2237 PetscFunctionReturn(0); 2238 } 2239 2240 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2241 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2242 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2243 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2244 2245 /* Get offdiagIdx[] for implicit 0.0 */ 2246 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2247 ba = bav; 2248 bi = b->i; 2249 bj = b->j; 2250 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2251 for (r = 0; r < m; r++) { 2252 ncols = bi[r+1] - bi[r]; 2253 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2254 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2255 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2256 offdiagA[r] = 0.0; 2257 2258 /* Find first hole in the cmap */ 2259 for (j=0; j<ncols; j++) { 2260 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2261 if (col > j && j < cstart) { 2262 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2263 break; 2264 } else if (col > j + n && j >= cstart) { 2265 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2266 break; 2267 } 2268 } 2269 if (j == ncols && ncols < A->cmap->N - n) { 2270 /* a hole is outside compressed Bcols */ 2271 if (ncols == 0) { 2272 if (cstart) { 2273 offdiagIdx[r] = 0; 2274 } else offdiagIdx[r] = cend; 2275 } else { /* ncols > 0 */ 2276 offdiagIdx[r] = cmap[ncols-1] + 1; 2277 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2278 } 2279 } 2280 } 2281 2282 for (j=0; j<ncols; j++) { 2283 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2284 ba++; bj++; 2285 } 2286 } 2287 2288 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2289 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2290 for (r = 0; r < m; ++r) { 2291 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2292 a[r] = diagA[r]; 2293 if (idx) idx[r] = cstart + diagIdx[r]; 2294 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2295 a[r] = diagA[r]; 2296 if (idx) { 2297 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2298 idx[r] = cstart + diagIdx[r]; 2299 } else idx[r] = offdiagIdx[r]; 2300 } 2301 } else { 2302 a[r] = offdiagA[r]; 2303 if (idx) idx[r] = offdiagIdx[r]; 2304 } 2305 } 2306 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2307 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2308 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2309 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2310 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2311 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2312 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2313 PetscFunctionReturn(0); 2314 } 2315 2316 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2317 { 2318 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2319 PetscInt m = A->rmap->n,n = A->cmap->n; 2320 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2321 PetscInt *cmap = mat->garray; 2322 PetscInt *diagIdx, *offdiagIdx; 2323 Vec diagV, offdiagV; 2324 PetscScalar *a, *diagA, *offdiagA; 2325 const PetscScalar *ba,*bav; 2326 PetscInt r,j,col,ncols,*bi,*bj; 2327 PetscErrorCode ierr; 2328 Mat B = mat->B; 2329 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2330 2331 PetscFunctionBegin; 2332 /* When a process holds entire A and other processes have no entry */ 2333 if (A->cmap->N == n) { 2334 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2335 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2336 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2337 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2338 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2339 PetscFunctionReturn(0); 2340 } else if (n == 0) { 2341 if (m) { 2342 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2343 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2344 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2345 } 2346 PetscFunctionReturn(0); 2347 } 2348 2349 ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2350 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2351 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2352 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2353 2354 /* Get offdiagIdx[] for implicit 0.0 */ 2355 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2356 ba = bav; 2357 bi = b->i; 2358 bj = b->j; 2359 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2360 for (r = 0; r < m; r++) { 2361 ncols = bi[r+1] - bi[r]; 2362 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2363 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2364 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2365 offdiagA[r] = 0.0; 2366 2367 /* Find first hole in the cmap */ 2368 for (j=0; j<ncols; j++) { 2369 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2370 if (col > j && j < cstart) { 2371 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2372 break; 2373 } else if (col > j + n && j >= cstart) { 2374 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2375 break; 2376 } 2377 } 2378 if (j == ncols && ncols < A->cmap->N - n) { 2379 /* a hole is outside compressed Bcols */ 2380 if (ncols == 0) { 2381 if (cstart) { 2382 offdiagIdx[r] = 0; 2383 } else offdiagIdx[r] = cend; 2384 } else { /* ncols > 0 */ 2385 offdiagIdx[r] = cmap[ncols-1] + 1; 2386 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2387 } 2388 } 2389 } 2390 2391 for (j=0; j<ncols; j++) { 2392 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2393 ba++; bj++; 2394 } 2395 } 2396 2397 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2398 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2399 for (r = 0; r < m; ++r) { 2400 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2401 a[r] = diagA[r]; 2402 if (idx) idx[r] = cstart + diagIdx[r]; 2403 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2404 a[r] = diagA[r]; 2405 if (idx) { 2406 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2407 idx[r] = cstart + diagIdx[r]; 2408 } else idx[r] = offdiagIdx[r]; 2409 } 2410 } else { 2411 a[r] = offdiagA[r]; 2412 if (idx) idx[r] = offdiagIdx[r]; 2413 } 2414 } 2415 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2416 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2417 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2418 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2419 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2420 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2421 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2422 PetscFunctionReturn(0); 2423 } 2424 2425 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2426 { 2427 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2428 PetscInt m = A->rmap->n,n = A->cmap->n; 2429 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2430 PetscInt *cmap = mat->garray; 2431 PetscInt *diagIdx, *offdiagIdx; 2432 Vec diagV, offdiagV; 2433 PetscScalar *a, *diagA, *offdiagA; 2434 const PetscScalar *ba,*bav; 2435 PetscInt r,j,col,ncols,*bi,*bj; 2436 PetscErrorCode ierr; 2437 Mat B = mat->B; 2438 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2439 2440 PetscFunctionBegin; 2441 /* When a process holds entire A and other processes have no entry */ 2442 if (A->cmap->N == n) { 2443 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2444 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2445 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2446 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2447 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2448 PetscFunctionReturn(0); 2449 } else if (n == 0) { 2450 if (m) { 2451 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2452 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2453 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2454 } 2455 PetscFunctionReturn(0); 2456 } 2457 2458 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2459 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2460 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2461 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2462 2463 /* Get offdiagIdx[] for implicit 0.0 */ 2464 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2465 ba = bav; 2466 bi = b->i; 2467 bj = b->j; 2468 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2469 for (r = 0; r < m; r++) { 2470 ncols = bi[r+1] - bi[r]; 2471 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2472 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2473 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2474 offdiagA[r] = 0.0; 2475 2476 /* Find first hole in the cmap */ 2477 for (j=0; j<ncols; j++) { 2478 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2479 if (col > j && j < cstart) { 2480 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2481 break; 2482 } else if (col > j + n && j >= cstart) { 2483 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2484 break; 2485 } 2486 } 2487 if (j == ncols && ncols < A->cmap->N - n) { 2488 /* a hole is outside compressed Bcols */ 2489 if (ncols == 0) { 2490 if (cstart) { 2491 offdiagIdx[r] = 0; 2492 } else offdiagIdx[r] = cend; 2493 } else { /* ncols > 0 */ 2494 offdiagIdx[r] = cmap[ncols-1] + 1; 2495 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2496 } 2497 } 2498 } 2499 2500 for (j=0; j<ncols; j++) { 2501 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2502 ba++; bj++; 2503 } 2504 } 2505 2506 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2507 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2508 for (r = 0; r < m; ++r) { 2509 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2510 a[r] = diagA[r]; 2511 if (idx) idx[r] = cstart + diagIdx[r]; 2512 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2513 a[r] = diagA[r]; 2514 if (idx) { 2515 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2516 idx[r] = cstart + diagIdx[r]; 2517 } else idx[r] = offdiagIdx[r]; 2518 } 2519 } else { 2520 a[r] = offdiagA[r]; 2521 if (idx) idx[r] = offdiagIdx[r]; 2522 } 2523 } 2524 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2525 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2526 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2527 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2528 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2529 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2530 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2531 PetscFunctionReturn(0); 2532 } 2533 2534 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2535 { 2536 PetscErrorCode ierr; 2537 Mat *dummy; 2538 2539 PetscFunctionBegin; 2540 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2541 *newmat = *dummy; 2542 ierr = PetscFree(dummy);CHKERRQ(ierr); 2543 PetscFunctionReturn(0); 2544 } 2545 2546 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2547 { 2548 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2549 PetscErrorCode ierr; 2550 2551 PetscFunctionBegin; 2552 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2553 A->factorerrortype = a->A->factorerrortype; 2554 PetscFunctionReturn(0); 2555 } 2556 2557 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2558 { 2559 PetscErrorCode ierr; 2560 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2561 2562 PetscFunctionBegin; 2563 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2564 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2565 if (x->assembled) { 2566 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2567 } else { 2568 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2569 } 2570 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2571 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2572 PetscFunctionReturn(0); 2573 } 2574 2575 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2576 { 2577 PetscFunctionBegin; 2578 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2579 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2580 PetscFunctionReturn(0); 2581 } 2582 2583 /*@ 2584 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2585 2586 Collective on Mat 2587 2588 Input Parameters: 2589 + A - the matrix 2590 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2591 2592 Level: advanced 2593 2594 @*/ 2595 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2596 { 2597 PetscErrorCode ierr; 2598 2599 PetscFunctionBegin; 2600 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2601 PetscFunctionReturn(0); 2602 } 2603 2604 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2605 { 2606 PetscErrorCode ierr; 2607 PetscBool sc = PETSC_FALSE,flg; 2608 2609 PetscFunctionBegin; 2610 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2611 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2612 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2613 if (flg) { 2614 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2615 } 2616 ierr = PetscOptionsTail();CHKERRQ(ierr); 2617 PetscFunctionReturn(0); 2618 } 2619 2620 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2621 { 2622 PetscErrorCode ierr; 2623 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2624 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2625 2626 PetscFunctionBegin; 2627 if (!Y->preallocated) { 2628 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2629 } else if (!aij->nz) { 2630 PetscInt nonew = aij->nonew; 2631 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2632 aij->nonew = nonew; 2633 } 2634 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2635 PetscFunctionReturn(0); 2636 } 2637 2638 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2639 { 2640 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2641 PetscErrorCode ierr; 2642 2643 PetscFunctionBegin; 2644 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2645 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2646 if (d) { 2647 PetscInt rstart; 2648 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2649 *d += rstart; 2650 2651 } 2652 PetscFunctionReturn(0); 2653 } 2654 2655 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2656 { 2657 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2658 PetscErrorCode ierr; 2659 2660 PetscFunctionBegin; 2661 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2662 PetscFunctionReturn(0); 2663 } 2664 2665 /* -------------------------------------------------------------------*/ 2666 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2667 MatGetRow_MPIAIJ, 2668 MatRestoreRow_MPIAIJ, 2669 MatMult_MPIAIJ, 2670 /* 4*/ MatMultAdd_MPIAIJ, 2671 MatMultTranspose_MPIAIJ, 2672 MatMultTransposeAdd_MPIAIJ, 2673 NULL, 2674 NULL, 2675 NULL, 2676 /*10*/ NULL, 2677 NULL, 2678 NULL, 2679 MatSOR_MPIAIJ, 2680 MatTranspose_MPIAIJ, 2681 /*15*/ MatGetInfo_MPIAIJ, 2682 MatEqual_MPIAIJ, 2683 MatGetDiagonal_MPIAIJ, 2684 MatDiagonalScale_MPIAIJ, 2685 MatNorm_MPIAIJ, 2686 /*20*/ MatAssemblyBegin_MPIAIJ, 2687 MatAssemblyEnd_MPIAIJ, 2688 MatSetOption_MPIAIJ, 2689 MatZeroEntries_MPIAIJ, 2690 /*24*/ MatZeroRows_MPIAIJ, 2691 NULL, 2692 NULL, 2693 NULL, 2694 NULL, 2695 /*29*/ MatSetUp_MPIAIJ, 2696 NULL, 2697 NULL, 2698 MatGetDiagonalBlock_MPIAIJ, 2699 NULL, 2700 /*34*/ MatDuplicate_MPIAIJ, 2701 NULL, 2702 NULL, 2703 NULL, 2704 NULL, 2705 /*39*/ MatAXPY_MPIAIJ, 2706 MatCreateSubMatrices_MPIAIJ, 2707 MatIncreaseOverlap_MPIAIJ, 2708 MatGetValues_MPIAIJ, 2709 MatCopy_MPIAIJ, 2710 /*44*/ MatGetRowMax_MPIAIJ, 2711 MatScale_MPIAIJ, 2712 MatShift_MPIAIJ, 2713 MatDiagonalSet_MPIAIJ, 2714 MatZeroRowsColumns_MPIAIJ, 2715 /*49*/ MatSetRandom_MPIAIJ, 2716 NULL, 2717 NULL, 2718 NULL, 2719 NULL, 2720 /*54*/ MatFDColoringCreate_MPIXAIJ, 2721 NULL, 2722 MatSetUnfactored_MPIAIJ, 2723 MatPermute_MPIAIJ, 2724 NULL, 2725 /*59*/ MatCreateSubMatrix_MPIAIJ, 2726 MatDestroy_MPIAIJ, 2727 MatView_MPIAIJ, 2728 NULL, 2729 NULL, 2730 /*64*/ NULL, 2731 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2732 NULL, 2733 NULL, 2734 NULL, 2735 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2736 MatGetRowMinAbs_MPIAIJ, 2737 NULL, 2738 NULL, 2739 NULL, 2740 NULL, 2741 /*75*/ MatFDColoringApply_AIJ, 2742 MatSetFromOptions_MPIAIJ, 2743 NULL, 2744 NULL, 2745 MatFindZeroDiagonals_MPIAIJ, 2746 /*80*/ NULL, 2747 NULL, 2748 NULL, 2749 /*83*/ MatLoad_MPIAIJ, 2750 MatIsSymmetric_MPIAIJ, 2751 NULL, 2752 NULL, 2753 NULL, 2754 NULL, 2755 /*89*/ NULL, 2756 NULL, 2757 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2758 NULL, 2759 NULL, 2760 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2761 NULL, 2762 NULL, 2763 NULL, 2764 MatBindToCPU_MPIAIJ, 2765 /*99*/ MatProductSetFromOptions_MPIAIJ, 2766 NULL, 2767 NULL, 2768 MatConjugate_MPIAIJ, 2769 NULL, 2770 /*104*/MatSetValuesRow_MPIAIJ, 2771 MatRealPart_MPIAIJ, 2772 MatImaginaryPart_MPIAIJ, 2773 NULL, 2774 NULL, 2775 /*109*/NULL, 2776 NULL, 2777 MatGetRowMin_MPIAIJ, 2778 NULL, 2779 MatMissingDiagonal_MPIAIJ, 2780 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2781 NULL, 2782 MatGetGhosts_MPIAIJ, 2783 NULL, 2784 NULL, 2785 /*119*/MatMultDiagonalBlock_MPIAIJ, 2786 NULL, 2787 NULL, 2788 NULL, 2789 MatGetMultiProcBlock_MPIAIJ, 2790 /*124*/MatFindNonzeroRows_MPIAIJ, 2791 MatGetColumnNorms_MPIAIJ, 2792 MatInvertBlockDiagonal_MPIAIJ, 2793 MatInvertVariableBlockDiagonal_MPIAIJ, 2794 MatCreateSubMatricesMPI_MPIAIJ, 2795 /*129*/NULL, 2796 NULL, 2797 NULL, 2798 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2799 NULL, 2800 /*134*/NULL, 2801 NULL, 2802 NULL, 2803 NULL, 2804 NULL, 2805 /*139*/MatSetBlockSizes_MPIAIJ, 2806 NULL, 2807 NULL, 2808 MatFDColoringSetUp_MPIXAIJ, 2809 MatFindOffBlockDiagonalEntries_MPIAIJ, 2810 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2811 /*145*/NULL, 2812 NULL, 2813 NULL 2814 }; 2815 2816 /* ----------------------------------------------------------------------------------------*/ 2817 2818 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2819 { 2820 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2821 PetscErrorCode ierr; 2822 2823 PetscFunctionBegin; 2824 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2825 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2826 PetscFunctionReturn(0); 2827 } 2828 2829 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2830 { 2831 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2832 PetscErrorCode ierr; 2833 2834 PetscFunctionBegin; 2835 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2836 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2837 PetscFunctionReturn(0); 2838 } 2839 2840 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2841 { 2842 Mat_MPIAIJ *b; 2843 PetscErrorCode ierr; 2844 PetscMPIInt size; 2845 2846 PetscFunctionBegin; 2847 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2848 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2849 b = (Mat_MPIAIJ*)B->data; 2850 2851 #if defined(PETSC_USE_CTABLE) 2852 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2853 #else 2854 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2855 #endif 2856 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2857 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2858 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2859 2860 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2861 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 2862 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2863 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2864 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2865 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2866 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2867 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2868 2869 if (!B->preallocated) { 2870 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2871 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2872 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2873 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2874 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2875 } 2876 2877 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2878 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2879 B->preallocated = PETSC_TRUE; 2880 B->was_assembled = PETSC_FALSE; 2881 B->assembled = PETSC_FALSE; 2882 PetscFunctionReturn(0); 2883 } 2884 2885 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2886 { 2887 Mat_MPIAIJ *b; 2888 PetscErrorCode ierr; 2889 2890 PetscFunctionBegin; 2891 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2892 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2893 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2894 b = (Mat_MPIAIJ*)B->data; 2895 2896 #if defined(PETSC_USE_CTABLE) 2897 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2898 #else 2899 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2900 #endif 2901 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2902 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2903 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2904 2905 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2906 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2907 B->preallocated = PETSC_TRUE; 2908 B->was_assembled = PETSC_FALSE; 2909 B->assembled = PETSC_FALSE; 2910 PetscFunctionReturn(0); 2911 } 2912 2913 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2914 { 2915 Mat mat; 2916 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2917 PetscErrorCode ierr; 2918 2919 PetscFunctionBegin; 2920 *newmat = NULL; 2921 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2922 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2923 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2924 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2925 a = (Mat_MPIAIJ*)mat->data; 2926 2927 mat->factortype = matin->factortype; 2928 mat->assembled = matin->assembled; 2929 mat->insertmode = NOT_SET_VALUES; 2930 mat->preallocated = matin->preallocated; 2931 2932 a->size = oldmat->size; 2933 a->rank = oldmat->rank; 2934 a->donotstash = oldmat->donotstash; 2935 a->roworiented = oldmat->roworiented; 2936 a->rowindices = NULL; 2937 a->rowvalues = NULL; 2938 a->getrowactive = PETSC_FALSE; 2939 2940 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2941 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2942 2943 if (oldmat->colmap) { 2944 #if defined(PETSC_USE_CTABLE) 2945 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2946 #else 2947 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2948 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2949 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2950 #endif 2951 } else a->colmap = NULL; 2952 if (oldmat->garray) { 2953 PetscInt len; 2954 len = oldmat->B->cmap->n; 2955 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2956 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2957 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2958 } else a->garray = NULL; 2959 2960 /* It may happen MatDuplicate is called with a non-assembled matrix 2961 In fact, MatDuplicate only requires the matrix to be preallocated 2962 This may happen inside a DMCreateMatrix_Shell */ 2963 if (oldmat->lvec) { 2964 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2965 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2966 } 2967 if (oldmat->Mvctx) { 2968 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2969 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2970 } 2971 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2972 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2973 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2974 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2975 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2976 *newmat = mat; 2977 PetscFunctionReturn(0); 2978 } 2979 2980 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2981 { 2982 PetscBool isbinary, ishdf5; 2983 PetscErrorCode ierr; 2984 2985 PetscFunctionBegin; 2986 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2987 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2988 /* force binary viewer to load .info file if it has not yet done so */ 2989 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2990 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2991 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2992 if (isbinary) { 2993 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2994 } else if (ishdf5) { 2995 #if defined(PETSC_HAVE_HDF5) 2996 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2997 #else 2998 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2999 #endif 3000 } else { 3001 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3002 } 3003 PetscFunctionReturn(0); 3004 } 3005 3006 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3007 { 3008 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3009 PetscInt *rowidxs,*colidxs; 3010 PetscScalar *matvals; 3011 PetscErrorCode ierr; 3012 3013 PetscFunctionBegin; 3014 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3015 3016 /* read in matrix header */ 3017 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3018 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3019 M = header[1]; N = header[2]; nz = header[3]; 3020 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3021 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3022 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3023 3024 /* set block sizes from the viewer's .info file */ 3025 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3026 /* set global sizes if not set already */ 3027 if (mat->rmap->N < 0) mat->rmap->N = M; 3028 if (mat->cmap->N < 0) mat->cmap->N = N; 3029 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3030 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3031 3032 /* check if the matrix sizes are correct */ 3033 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3034 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3035 3036 /* read in row lengths and build row indices */ 3037 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3038 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3039 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3040 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3041 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr); 3042 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3043 /* read in column indices and matrix values */ 3044 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3045 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3046 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3047 /* store matrix indices and values */ 3048 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3049 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3050 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3051 PetscFunctionReturn(0); 3052 } 3053 3054 /* Not scalable because of ISAllGather() unless getting all columns. */ 3055 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3056 { 3057 PetscErrorCode ierr; 3058 IS iscol_local; 3059 PetscBool isstride; 3060 PetscMPIInt lisstride=0,gisstride; 3061 3062 PetscFunctionBegin; 3063 /* check if we are grabbing all columns*/ 3064 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3065 3066 if (isstride) { 3067 PetscInt start,len,mstart,mlen; 3068 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3069 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3070 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3071 if (mstart == start && mlen-mstart == len) lisstride = 1; 3072 } 3073 3074 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3075 if (gisstride) { 3076 PetscInt N; 3077 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3078 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3079 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3080 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3081 } else { 3082 PetscInt cbs; 3083 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3084 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3085 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3086 } 3087 3088 *isseq = iscol_local; 3089 PetscFunctionReturn(0); 3090 } 3091 3092 /* 3093 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3094 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3095 3096 Input Parameters: 3097 mat - matrix 3098 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3099 i.e., mat->rstart <= isrow[i] < mat->rend 3100 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3101 i.e., mat->cstart <= iscol[i] < mat->cend 3102 Output Parameter: 3103 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3104 iscol_o - sequential column index set for retrieving mat->B 3105 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3106 */ 3107 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3108 { 3109 PetscErrorCode ierr; 3110 Vec x,cmap; 3111 const PetscInt *is_idx; 3112 PetscScalar *xarray,*cmaparray; 3113 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3114 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3115 Mat B=a->B; 3116 Vec lvec=a->lvec,lcmap; 3117 PetscInt i,cstart,cend,Bn=B->cmap->N; 3118 MPI_Comm comm; 3119 VecScatter Mvctx=a->Mvctx; 3120 3121 PetscFunctionBegin; 3122 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3123 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3124 3125 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3126 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3127 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3128 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3129 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3130 3131 /* Get start indices */ 3132 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3133 isstart -= ncols; 3134 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3135 3136 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3137 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3138 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3139 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3140 for (i=0; i<ncols; i++) { 3141 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3142 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3143 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3144 } 3145 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3146 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3147 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3148 3149 /* Get iscol_d */ 3150 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3151 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3152 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3153 3154 /* Get isrow_d */ 3155 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3156 rstart = mat->rmap->rstart; 3157 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3158 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3159 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3160 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3161 3162 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3163 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3164 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3165 3166 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3167 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3168 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3169 3170 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3171 3172 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3173 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3174 3175 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3176 /* off-process column indices */ 3177 count = 0; 3178 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3179 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3180 3181 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3182 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3183 for (i=0; i<Bn; i++) { 3184 if (PetscRealPart(xarray[i]) > -1.0) { 3185 idx[count] = i; /* local column index in off-diagonal part B */ 3186 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3187 count++; 3188 } 3189 } 3190 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3191 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3192 3193 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3194 /* cannot ensure iscol_o has same blocksize as iscol! */ 3195 3196 ierr = PetscFree(idx);CHKERRQ(ierr); 3197 *garray = cmap1; 3198 3199 ierr = VecDestroy(&x);CHKERRQ(ierr); 3200 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3201 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3202 PetscFunctionReturn(0); 3203 } 3204 3205 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3206 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3207 { 3208 PetscErrorCode ierr; 3209 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3210 Mat M = NULL; 3211 MPI_Comm comm; 3212 IS iscol_d,isrow_d,iscol_o; 3213 Mat Asub = NULL,Bsub = NULL; 3214 PetscInt n; 3215 3216 PetscFunctionBegin; 3217 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3218 3219 if (call == MAT_REUSE_MATRIX) { 3220 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3221 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3222 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3223 3224 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3225 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3226 3227 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3228 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3229 3230 /* Update diagonal and off-diagonal portions of submat */ 3231 asub = (Mat_MPIAIJ*)(*submat)->data; 3232 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3233 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3234 if (n) { 3235 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3236 } 3237 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3238 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3239 3240 } else { /* call == MAT_INITIAL_MATRIX) */ 3241 const PetscInt *garray; 3242 PetscInt BsubN; 3243 3244 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3245 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3246 3247 /* Create local submatrices Asub and Bsub */ 3248 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3249 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3250 3251 /* Create submatrix M */ 3252 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3253 3254 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3255 asub = (Mat_MPIAIJ*)M->data; 3256 3257 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3258 n = asub->B->cmap->N; 3259 if (BsubN > n) { 3260 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3261 const PetscInt *idx; 3262 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3263 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3264 3265 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3266 j = 0; 3267 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3268 for (i=0; i<n; i++) { 3269 if (j >= BsubN) break; 3270 while (subgarray[i] > garray[j]) j++; 3271 3272 if (subgarray[i] == garray[j]) { 3273 idx_new[i] = idx[j++]; 3274 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3275 } 3276 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3277 3278 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3279 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3280 3281 } else if (BsubN < n) { 3282 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3283 } 3284 3285 ierr = PetscFree(garray);CHKERRQ(ierr); 3286 *submat = M; 3287 3288 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3289 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3290 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3291 3292 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3293 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3294 3295 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3296 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3297 } 3298 PetscFunctionReturn(0); 3299 } 3300 3301 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3302 { 3303 PetscErrorCode ierr; 3304 IS iscol_local=NULL,isrow_d; 3305 PetscInt csize; 3306 PetscInt n,i,j,start,end; 3307 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3308 MPI_Comm comm; 3309 3310 PetscFunctionBegin; 3311 /* If isrow has same processor distribution as mat, 3312 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3313 if (call == MAT_REUSE_MATRIX) { 3314 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3315 if (isrow_d) { 3316 sameRowDist = PETSC_TRUE; 3317 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3318 } else { 3319 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3320 if (iscol_local) { 3321 sameRowDist = PETSC_TRUE; 3322 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3323 } 3324 } 3325 } else { 3326 /* Check if isrow has same processor distribution as mat */ 3327 sameDist[0] = PETSC_FALSE; 3328 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3329 if (!n) { 3330 sameDist[0] = PETSC_TRUE; 3331 } else { 3332 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3333 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3334 if (i >= start && j < end) { 3335 sameDist[0] = PETSC_TRUE; 3336 } 3337 } 3338 3339 /* Check if iscol has same processor distribution as mat */ 3340 sameDist[1] = PETSC_FALSE; 3341 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3342 if (!n) { 3343 sameDist[1] = PETSC_TRUE; 3344 } else { 3345 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3346 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3347 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3348 } 3349 3350 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3351 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr); 3352 sameRowDist = tsameDist[0]; 3353 } 3354 3355 if (sameRowDist) { 3356 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3357 /* isrow and iscol have same processor distribution as mat */ 3358 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3359 PetscFunctionReturn(0); 3360 } else { /* sameRowDist */ 3361 /* isrow has same processor distribution as mat */ 3362 if (call == MAT_INITIAL_MATRIX) { 3363 PetscBool sorted; 3364 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3365 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3366 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3367 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3368 3369 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3370 if (sorted) { 3371 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3372 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3373 PetscFunctionReturn(0); 3374 } 3375 } else { /* call == MAT_REUSE_MATRIX */ 3376 IS iscol_sub; 3377 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3378 if (iscol_sub) { 3379 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3380 PetscFunctionReturn(0); 3381 } 3382 } 3383 } 3384 } 3385 3386 /* General case: iscol -> iscol_local which has global size of iscol */ 3387 if (call == MAT_REUSE_MATRIX) { 3388 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3389 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3390 } else { 3391 if (!iscol_local) { 3392 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3393 } 3394 } 3395 3396 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3397 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3398 3399 if (call == MAT_INITIAL_MATRIX) { 3400 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3401 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3402 } 3403 PetscFunctionReturn(0); 3404 } 3405 3406 /*@C 3407 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3408 and "off-diagonal" part of the matrix in CSR format. 3409 3410 Collective 3411 3412 Input Parameters: 3413 + comm - MPI communicator 3414 . A - "diagonal" portion of matrix 3415 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3416 - garray - global index of B columns 3417 3418 Output Parameter: 3419 . mat - the matrix, with input A as its local diagonal matrix 3420 Level: advanced 3421 3422 Notes: 3423 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3424 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3425 3426 .seealso: MatCreateMPIAIJWithSplitArrays() 3427 @*/ 3428 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3429 { 3430 PetscErrorCode ierr; 3431 Mat_MPIAIJ *maij; 3432 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3433 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3434 const PetscScalar *oa; 3435 Mat Bnew; 3436 PetscInt m,n,N; 3437 3438 PetscFunctionBegin; 3439 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3440 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3441 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3442 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3443 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3444 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3445 3446 /* Get global columns of mat */ 3447 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3448 3449 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3450 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3451 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3452 maij = (Mat_MPIAIJ*)(*mat)->data; 3453 3454 (*mat)->preallocated = PETSC_TRUE; 3455 3456 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3457 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3458 3459 /* Set A as diagonal portion of *mat */ 3460 maij->A = A; 3461 3462 nz = oi[m]; 3463 for (i=0; i<nz; i++) { 3464 col = oj[i]; 3465 oj[i] = garray[col]; 3466 } 3467 3468 /* Set Bnew as off-diagonal portion of *mat */ 3469 ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr); 3470 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr); 3471 ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr); 3472 bnew = (Mat_SeqAIJ*)Bnew->data; 3473 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3474 maij->B = Bnew; 3475 3476 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3477 3478 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3479 b->free_a = PETSC_FALSE; 3480 b->free_ij = PETSC_FALSE; 3481 ierr = MatDestroy(&B);CHKERRQ(ierr); 3482 3483 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3484 bnew->free_a = PETSC_TRUE; 3485 bnew->free_ij = PETSC_TRUE; 3486 3487 /* condense columns of maij->B */ 3488 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3489 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3490 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3491 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3492 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3493 PetscFunctionReturn(0); 3494 } 3495 3496 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3497 3498 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3499 { 3500 PetscErrorCode ierr; 3501 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3502 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3503 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3504 Mat M,Msub,B=a->B; 3505 MatScalar *aa; 3506 Mat_SeqAIJ *aij; 3507 PetscInt *garray = a->garray,*colsub,Ncols; 3508 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3509 IS iscol_sub,iscmap; 3510 const PetscInt *is_idx,*cmap; 3511 PetscBool allcolumns=PETSC_FALSE; 3512 MPI_Comm comm; 3513 3514 PetscFunctionBegin; 3515 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3516 if (call == MAT_REUSE_MATRIX) { 3517 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3518 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3519 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3520 3521 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3522 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3523 3524 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3525 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3526 3527 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3528 3529 } else { /* call == MAT_INITIAL_MATRIX) */ 3530 PetscBool flg; 3531 3532 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3533 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3534 3535 /* (1) iscol -> nonscalable iscol_local */ 3536 /* Check for special case: each processor gets entire matrix columns */ 3537 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3538 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3539 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3540 if (allcolumns) { 3541 iscol_sub = iscol_local; 3542 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3543 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3544 3545 } else { 3546 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3547 PetscInt *idx,*cmap1,k; 3548 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3549 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3550 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3551 count = 0; 3552 k = 0; 3553 for (i=0; i<Ncols; i++) { 3554 j = is_idx[i]; 3555 if (j >= cstart && j < cend) { 3556 /* diagonal part of mat */ 3557 idx[count] = j; 3558 cmap1[count++] = i; /* column index in submat */ 3559 } else if (Bn) { 3560 /* off-diagonal part of mat */ 3561 if (j == garray[k]) { 3562 idx[count] = j; 3563 cmap1[count++] = i; /* column index in submat */ 3564 } else if (j > garray[k]) { 3565 while (j > garray[k] && k < Bn-1) k++; 3566 if (j == garray[k]) { 3567 idx[count] = j; 3568 cmap1[count++] = i; /* column index in submat */ 3569 } 3570 } 3571 } 3572 } 3573 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3574 3575 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3576 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3577 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3578 3579 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3580 } 3581 3582 /* (3) Create sequential Msub */ 3583 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3584 } 3585 3586 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3587 aij = (Mat_SeqAIJ*)(Msub)->data; 3588 ii = aij->i; 3589 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3590 3591 /* 3592 m - number of local rows 3593 Ncols - number of columns (same on all processors) 3594 rstart - first row in new global matrix generated 3595 */ 3596 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3597 3598 if (call == MAT_INITIAL_MATRIX) { 3599 /* (4) Create parallel newmat */ 3600 PetscMPIInt rank,size; 3601 PetscInt csize; 3602 3603 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3604 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3605 3606 /* 3607 Determine the number of non-zeros in the diagonal and off-diagonal 3608 portions of the matrix in order to do correct preallocation 3609 */ 3610 3611 /* first get start and end of "diagonal" columns */ 3612 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3613 if (csize == PETSC_DECIDE) { 3614 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3615 if (mglobal == Ncols) { /* square matrix */ 3616 nlocal = m; 3617 } else { 3618 nlocal = Ncols/size + ((Ncols % size) > rank); 3619 } 3620 } else { 3621 nlocal = csize; 3622 } 3623 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3624 rstart = rend - nlocal; 3625 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3626 3627 /* next, compute all the lengths */ 3628 jj = aij->j; 3629 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3630 olens = dlens + m; 3631 for (i=0; i<m; i++) { 3632 jend = ii[i+1] - ii[i]; 3633 olen = 0; 3634 dlen = 0; 3635 for (j=0; j<jend; j++) { 3636 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3637 else dlen++; 3638 jj++; 3639 } 3640 olens[i] = olen; 3641 dlens[i] = dlen; 3642 } 3643 3644 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3645 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3646 3647 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3648 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3649 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3650 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3651 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3652 ierr = PetscFree(dlens);CHKERRQ(ierr); 3653 3654 } else { /* call == MAT_REUSE_MATRIX */ 3655 M = *newmat; 3656 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3657 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3658 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3659 /* 3660 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3661 rather than the slower MatSetValues(). 3662 */ 3663 M->was_assembled = PETSC_TRUE; 3664 M->assembled = PETSC_FALSE; 3665 } 3666 3667 /* (5) Set values of Msub to *newmat */ 3668 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3669 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3670 3671 jj = aij->j; 3672 ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3673 for (i=0; i<m; i++) { 3674 row = rstart + i; 3675 nz = ii[i+1] - ii[i]; 3676 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3677 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3678 jj += nz; aa += nz; 3679 } 3680 ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3681 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3682 3683 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3684 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3685 3686 ierr = PetscFree(colsub);CHKERRQ(ierr); 3687 3688 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3689 if (call == MAT_INITIAL_MATRIX) { 3690 *newmat = M; 3691 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3692 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3693 3694 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3695 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3696 3697 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3698 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3699 3700 if (iscol_local) { 3701 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3702 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3703 } 3704 } 3705 PetscFunctionReturn(0); 3706 } 3707 3708 /* 3709 Not great since it makes two copies of the submatrix, first an SeqAIJ 3710 in local and then by concatenating the local matrices the end result. 3711 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3712 3713 Note: This requires a sequential iscol with all indices. 3714 */ 3715 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3716 { 3717 PetscErrorCode ierr; 3718 PetscMPIInt rank,size; 3719 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3720 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3721 Mat M,Mreuse; 3722 MatScalar *aa,*vwork; 3723 MPI_Comm comm; 3724 Mat_SeqAIJ *aij; 3725 PetscBool colflag,allcolumns=PETSC_FALSE; 3726 3727 PetscFunctionBegin; 3728 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3729 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3730 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3731 3732 /* Check for special case: each processor gets entire matrix columns */ 3733 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3734 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3735 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3736 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3737 3738 if (call == MAT_REUSE_MATRIX) { 3739 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3740 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3741 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3742 } else { 3743 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3744 } 3745 3746 /* 3747 m - number of local rows 3748 n - number of columns (same on all processors) 3749 rstart - first row in new global matrix generated 3750 */ 3751 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3752 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3753 if (call == MAT_INITIAL_MATRIX) { 3754 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3755 ii = aij->i; 3756 jj = aij->j; 3757 3758 /* 3759 Determine the number of non-zeros in the diagonal and off-diagonal 3760 portions of the matrix in order to do correct preallocation 3761 */ 3762 3763 /* first get start and end of "diagonal" columns */ 3764 if (csize == PETSC_DECIDE) { 3765 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3766 if (mglobal == n) { /* square matrix */ 3767 nlocal = m; 3768 } else { 3769 nlocal = n/size + ((n % size) > rank); 3770 } 3771 } else { 3772 nlocal = csize; 3773 } 3774 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3775 rstart = rend - nlocal; 3776 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3777 3778 /* next, compute all the lengths */ 3779 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3780 olens = dlens + m; 3781 for (i=0; i<m; i++) { 3782 jend = ii[i+1] - ii[i]; 3783 olen = 0; 3784 dlen = 0; 3785 for (j=0; j<jend; j++) { 3786 if (*jj < rstart || *jj >= rend) olen++; 3787 else dlen++; 3788 jj++; 3789 } 3790 olens[i] = olen; 3791 dlens[i] = dlen; 3792 } 3793 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3794 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3795 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3796 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3797 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3798 ierr = PetscFree(dlens);CHKERRQ(ierr); 3799 } else { 3800 PetscInt ml,nl; 3801 3802 M = *newmat; 3803 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3804 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3805 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3806 /* 3807 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3808 rather than the slower MatSetValues(). 3809 */ 3810 M->was_assembled = PETSC_TRUE; 3811 M->assembled = PETSC_FALSE; 3812 } 3813 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3814 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3815 ii = aij->i; 3816 jj = aij->j; 3817 3818 /* trigger copy to CPU if needed */ 3819 ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3820 for (i=0; i<m; i++) { 3821 row = rstart + i; 3822 nz = ii[i+1] - ii[i]; 3823 cwork = jj; jj += nz; 3824 vwork = aa; aa += nz; 3825 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3826 } 3827 ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3828 3829 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3830 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3831 *newmat = M; 3832 3833 /* save submatrix used in processor for next request */ 3834 if (call == MAT_INITIAL_MATRIX) { 3835 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3836 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3837 } 3838 PetscFunctionReturn(0); 3839 } 3840 3841 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3842 { 3843 PetscInt m,cstart, cend,j,nnz,i,d; 3844 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3845 const PetscInt *JJ; 3846 PetscErrorCode ierr; 3847 PetscBool nooffprocentries; 3848 3849 PetscFunctionBegin; 3850 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3851 3852 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3853 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3854 m = B->rmap->n; 3855 cstart = B->cmap->rstart; 3856 cend = B->cmap->rend; 3857 rstart = B->rmap->rstart; 3858 3859 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3860 3861 if (PetscDefined(USE_DEBUG)) { 3862 for (i=0; i<m; i++) { 3863 nnz = Ii[i+1]- Ii[i]; 3864 JJ = J + Ii[i]; 3865 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3866 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3867 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3868 } 3869 } 3870 3871 for (i=0; i<m; i++) { 3872 nnz = Ii[i+1]- Ii[i]; 3873 JJ = J + Ii[i]; 3874 nnz_max = PetscMax(nnz_max,nnz); 3875 d = 0; 3876 for (j=0; j<nnz; j++) { 3877 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3878 } 3879 d_nnz[i] = d; 3880 o_nnz[i] = nnz - d; 3881 } 3882 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3883 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3884 3885 for (i=0; i<m; i++) { 3886 ii = i + rstart; 3887 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3888 } 3889 nooffprocentries = B->nooffprocentries; 3890 B->nooffprocentries = PETSC_TRUE; 3891 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3892 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3893 B->nooffprocentries = nooffprocentries; 3894 3895 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3896 PetscFunctionReturn(0); 3897 } 3898 3899 /*@ 3900 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3901 (the default parallel PETSc format). 3902 3903 Collective 3904 3905 Input Parameters: 3906 + B - the matrix 3907 . i - the indices into j for the start of each local row (starts with zero) 3908 . j - the column indices for each local row (starts with zero) 3909 - v - optional values in the matrix 3910 3911 Level: developer 3912 3913 Notes: 3914 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3915 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3916 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3917 3918 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3919 3920 The format which is used for the sparse matrix input, is equivalent to a 3921 row-major ordering.. i.e for the following matrix, the input data expected is 3922 as shown 3923 3924 $ 1 0 0 3925 $ 2 0 3 P0 3926 $ ------- 3927 $ 4 5 6 P1 3928 $ 3929 $ Process0 [P0]: rows_owned=[0,1] 3930 $ i = {0,1,3} [size = nrow+1 = 2+1] 3931 $ j = {0,0,2} [size = 3] 3932 $ v = {1,2,3} [size = 3] 3933 $ 3934 $ Process1 [P1]: rows_owned=[2] 3935 $ i = {0,3} [size = nrow+1 = 1+1] 3936 $ j = {0,1,2} [size = 3] 3937 $ v = {4,5,6} [size = 3] 3938 3939 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3940 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3941 @*/ 3942 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3943 { 3944 PetscErrorCode ierr; 3945 3946 PetscFunctionBegin; 3947 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3948 PetscFunctionReturn(0); 3949 } 3950 3951 /*@C 3952 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3953 (the default parallel PETSc format). For good matrix assembly performance 3954 the user should preallocate the matrix storage by setting the parameters 3955 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3956 performance can be increased by more than a factor of 50. 3957 3958 Collective 3959 3960 Input Parameters: 3961 + B - the matrix 3962 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3963 (same value is used for all local rows) 3964 . d_nnz - array containing the number of nonzeros in the various rows of the 3965 DIAGONAL portion of the local submatrix (possibly different for each row) 3966 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3967 The size of this array is equal to the number of local rows, i.e 'm'. 3968 For matrices that will be factored, you must leave room for (and set) 3969 the diagonal entry even if it is zero. 3970 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3971 submatrix (same value is used for all local rows). 3972 - o_nnz - array containing the number of nonzeros in the various rows of the 3973 OFF-DIAGONAL portion of the local submatrix (possibly different for 3974 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3975 structure. The size of this array is equal to the number 3976 of local rows, i.e 'm'. 3977 3978 If the *_nnz parameter is given then the *_nz parameter is ignored 3979 3980 The AIJ format (also called the Yale sparse matrix format or 3981 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3982 storage. The stored row and column indices begin with zero. 3983 See Users-Manual: ch_mat for details. 3984 3985 The parallel matrix is partitioned such that the first m0 rows belong to 3986 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3987 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3988 3989 The DIAGONAL portion of the local submatrix of a processor can be defined 3990 as the submatrix which is obtained by extraction the part corresponding to 3991 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3992 first row that belongs to the processor, r2 is the last row belonging to 3993 the this processor, and c1-c2 is range of indices of the local part of a 3994 vector suitable for applying the matrix to. This is an mxn matrix. In the 3995 common case of a square matrix, the row and column ranges are the same and 3996 the DIAGONAL part is also square. The remaining portion of the local 3997 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3998 3999 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4000 4001 You can call MatGetInfo() to get information on how effective the preallocation was; 4002 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4003 You can also run with the option -info and look for messages with the string 4004 malloc in them to see if additional memory allocation was needed. 4005 4006 Example usage: 4007 4008 Consider the following 8x8 matrix with 34 non-zero values, that is 4009 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4010 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4011 as follows: 4012 4013 .vb 4014 1 2 0 | 0 3 0 | 0 4 4015 Proc0 0 5 6 | 7 0 0 | 8 0 4016 9 0 10 | 11 0 0 | 12 0 4017 ------------------------------------- 4018 13 0 14 | 15 16 17 | 0 0 4019 Proc1 0 18 0 | 19 20 21 | 0 0 4020 0 0 0 | 22 23 0 | 24 0 4021 ------------------------------------- 4022 Proc2 25 26 27 | 0 0 28 | 29 0 4023 30 0 0 | 31 32 33 | 0 34 4024 .ve 4025 4026 This can be represented as a collection of submatrices as: 4027 4028 .vb 4029 A B C 4030 D E F 4031 G H I 4032 .ve 4033 4034 Where the submatrices A,B,C are owned by proc0, D,E,F are 4035 owned by proc1, G,H,I are owned by proc2. 4036 4037 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4038 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4039 The 'M','N' parameters are 8,8, and have the same values on all procs. 4040 4041 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4042 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4043 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4044 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4045 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4046 matrix, ans [DF] as another SeqAIJ matrix. 4047 4048 When d_nz, o_nz parameters are specified, d_nz storage elements are 4049 allocated for every row of the local diagonal submatrix, and o_nz 4050 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4051 One way to choose d_nz and o_nz is to use the max nonzerors per local 4052 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4053 In this case, the values of d_nz,o_nz are: 4054 .vb 4055 proc0 : dnz = 2, o_nz = 2 4056 proc1 : dnz = 3, o_nz = 2 4057 proc2 : dnz = 1, o_nz = 4 4058 .ve 4059 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4060 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4061 for proc3. i.e we are using 12+15+10=37 storage locations to store 4062 34 values. 4063 4064 When d_nnz, o_nnz parameters are specified, the storage is specified 4065 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4066 In the above case the values for d_nnz,o_nnz are: 4067 .vb 4068 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4069 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4070 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4071 .ve 4072 Here the space allocated is sum of all the above values i.e 34, and 4073 hence pre-allocation is perfect. 4074 4075 Level: intermediate 4076 4077 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4078 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4079 @*/ 4080 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4081 { 4082 PetscErrorCode ierr; 4083 4084 PetscFunctionBegin; 4085 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4086 PetscValidType(B,1); 4087 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4088 PetscFunctionReturn(0); 4089 } 4090 4091 /*@ 4092 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4093 CSR format for the local rows. 4094 4095 Collective 4096 4097 Input Parameters: 4098 + comm - MPI communicator 4099 . m - number of local rows (Cannot be PETSC_DECIDE) 4100 . n - This value should be the same as the local size used in creating the 4101 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4102 calculated if N is given) For square matrices n is almost always m. 4103 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4104 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4105 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4106 . j - column indices 4107 - a - matrix values 4108 4109 Output Parameter: 4110 . mat - the matrix 4111 4112 Level: intermediate 4113 4114 Notes: 4115 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4116 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4117 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4118 4119 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4120 4121 The format which is used for the sparse matrix input, is equivalent to a 4122 row-major ordering.. i.e for the following matrix, the input data expected is 4123 as shown 4124 4125 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4126 4127 $ 1 0 0 4128 $ 2 0 3 P0 4129 $ ------- 4130 $ 4 5 6 P1 4131 $ 4132 $ Process0 [P0]: rows_owned=[0,1] 4133 $ i = {0,1,3} [size = nrow+1 = 2+1] 4134 $ j = {0,0,2} [size = 3] 4135 $ v = {1,2,3} [size = 3] 4136 $ 4137 $ Process1 [P1]: rows_owned=[2] 4138 $ i = {0,3} [size = nrow+1 = 1+1] 4139 $ j = {0,1,2} [size = 3] 4140 $ v = {4,5,6} [size = 3] 4141 4142 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4143 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4144 @*/ 4145 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4146 { 4147 PetscErrorCode ierr; 4148 4149 PetscFunctionBegin; 4150 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4151 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4152 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4153 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4154 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4155 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4156 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4157 PetscFunctionReturn(0); 4158 } 4159 4160 /*@ 4161 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4162 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4163 4164 Collective 4165 4166 Input Parameters: 4167 + mat - the matrix 4168 . m - number of local rows (Cannot be PETSC_DECIDE) 4169 . n - This value should be the same as the local size used in creating the 4170 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4171 calculated if N is given) For square matrices n is almost always m. 4172 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4173 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4174 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4175 . J - column indices 4176 - v - matrix values 4177 4178 Level: intermediate 4179 4180 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4181 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4182 @*/ 4183 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4184 { 4185 PetscErrorCode ierr; 4186 PetscInt cstart,nnz,i,j; 4187 PetscInt *ld; 4188 PetscBool nooffprocentries; 4189 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4190 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4191 PetscScalar *ad = Ad->a, *ao = Ao->a; 4192 const PetscInt *Adi = Ad->i; 4193 PetscInt ldi,Iii,md; 4194 4195 PetscFunctionBegin; 4196 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4197 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4198 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4199 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4200 4201 cstart = mat->cmap->rstart; 4202 if (!Aij->ld) { 4203 /* count number of entries below block diagonal */ 4204 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4205 Aij->ld = ld; 4206 for (i=0; i<m; i++) { 4207 nnz = Ii[i+1]- Ii[i]; 4208 j = 0; 4209 while (J[j] < cstart && j < nnz) {j++;} 4210 J += nnz; 4211 ld[i] = j; 4212 } 4213 } else { 4214 ld = Aij->ld; 4215 } 4216 4217 for (i=0; i<m; i++) { 4218 nnz = Ii[i+1]- Ii[i]; 4219 Iii = Ii[i]; 4220 ldi = ld[i]; 4221 md = Adi[i+1]-Adi[i]; 4222 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4223 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4224 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4225 ad += md; 4226 ao += nnz - md; 4227 } 4228 nooffprocentries = mat->nooffprocentries; 4229 mat->nooffprocentries = PETSC_TRUE; 4230 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4231 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4232 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4233 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4234 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4235 mat->nooffprocentries = nooffprocentries; 4236 PetscFunctionReturn(0); 4237 } 4238 4239 /*@C 4240 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4241 (the default parallel PETSc format). For good matrix assembly performance 4242 the user should preallocate the matrix storage by setting the parameters 4243 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4244 performance can be increased by more than a factor of 50. 4245 4246 Collective 4247 4248 Input Parameters: 4249 + comm - MPI communicator 4250 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4251 This value should be the same as the local size used in creating the 4252 y vector for the matrix-vector product y = Ax. 4253 . n - This value should be the same as the local size used in creating the 4254 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4255 calculated if N is given) For square matrices n is almost always m. 4256 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4257 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4258 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4259 (same value is used for all local rows) 4260 . d_nnz - array containing the number of nonzeros in the various rows of the 4261 DIAGONAL portion of the local submatrix (possibly different for each row) 4262 or NULL, if d_nz is used to specify the nonzero structure. 4263 The size of this array is equal to the number of local rows, i.e 'm'. 4264 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4265 submatrix (same value is used for all local rows). 4266 - o_nnz - array containing the number of nonzeros in the various rows of the 4267 OFF-DIAGONAL portion of the local submatrix (possibly different for 4268 each row) or NULL, if o_nz is used to specify the nonzero 4269 structure. The size of this array is equal to the number 4270 of local rows, i.e 'm'. 4271 4272 Output Parameter: 4273 . A - the matrix 4274 4275 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4276 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4277 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4278 4279 Notes: 4280 If the *_nnz parameter is given then the *_nz parameter is ignored 4281 4282 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4283 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4284 storage requirements for this matrix. 4285 4286 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4287 processor than it must be used on all processors that share the object for 4288 that argument. 4289 4290 The user MUST specify either the local or global matrix dimensions 4291 (possibly both). 4292 4293 The parallel matrix is partitioned across processors such that the 4294 first m0 rows belong to process 0, the next m1 rows belong to 4295 process 1, the next m2 rows belong to process 2 etc.. where 4296 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4297 values corresponding to [m x N] submatrix. 4298 4299 The columns are logically partitioned with the n0 columns belonging 4300 to 0th partition, the next n1 columns belonging to the next 4301 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4302 4303 The DIAGONAL portion of the local submatrix on any given processor 4304 is the submatrix corresponding to the rows and columns m,n 4305 corresponding to the given processor. i.e diagonal matrix on 4306 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4307 etc. The remaining portion of the local submatrix [m x (N-n)] 4308 constitute the OFF-DIAGONAL portion. The example below better 4309 illustrates this concept. 4310 4311 For a square global matrix we define each processor's diagonal portion 4312 to be its local rows and the corresponding columns (a square submatrix); 4313 each processor's off-diagonal portion encompasses the remainder of the 4314 local matrix (a rectangular submatrix). 4315 4316 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4317 4318 When calling this routine with a single process communicator, a matrix of 4319 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4320 type of communicator, use the construction mechanism 4321 .vb 4322 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4323 .ve 4324 4325 $ MatCreate(...,&A); 4326 $ MatSetType(A,MATMPIAIJ); 4327 $ MatSetSizes(A, m,n,M,N); 4328 $ MatMPIAIJSetPreallocation(A,...); 4329 4330 By default, this format uses inodes (identical nodes) when possible. 4331 We search for consecutive rows with the same nonzero structure, thereby 4332 reusing matrix information to achieve increased efficiency. 4333 4334 Options Database Keys: 4335 + -mat_no_inode - Do not use inodes 4336 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4337 4338 Example usage: 4339 4340 Consider the following 8x8 matrix with 34 non-zero values, that is 4341 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4342 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4343 as follows 4344 4345 .vb 4346 1 2 0 | 0 3 0 | 0 4 4347 Proc0 0 5 6 | 7 0 0 | 8 0 4348 9 0 10 | 11 0 0 | 12 0 4349 ------------------------------------- 4350 13 0 14 | 15 16 17 | 0 0 4351 Proc1 0 18 0 | 19 20 21 | 0 0 4352 0 0 0 | 22 23 0 | 24 0 4353 ------------------------------------- 4354 Proc2 25 26 27 | 0 0 28 | 29 0 4355 30 0 0 | 31 32 33 | 0 34 4356 .ve 4357 4358 This can be represented as a collection of submatrices as 4359 4360 .vb 4361 A B C 4362 D E F 4363 G H I 4364 .ve 4365 4366 Where the submatrices A,B,C are owned by proc0, D,E,F are 4367 owned by proc1, G,H,I are owned by proc2. 4368 4369 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4370 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4371 The 'M','N' parameters are 8,8, and have the same values on all procs. 4372 4373 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4374 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4375 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4376 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4377 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4378 matrix, ans [DF] as another SeqAIJ matrix. 4379 4380 When d_nz, o_nz parameters are specified, d_nz storage elements are 4381 allocated for every row of the local diagonal submatrix, and o_nz 4382 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4383 One way to choose d_nz and o_nz is to use the max nonzerors per local 4384 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4385 In this case, the values of d_nz,o_nz are 4386 .vb 4387 proc0 : dnz = 2, o_nz = 2 4388 proc1 : dnz = 3, o_nz = 2 4389 proc2 : dnz = 1, o_nz = 4 4390 .ve 4391 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4392 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4393 for proc3. i.e we are using 12+15+10=37 storage locations to store 4394 34 values. 4395 4396 When d_nnz, o_nnz parameters are specified, the storage is specified 4397 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4398 In the above case the values for d_nnz,o_nnz are 4399 .vb 4400 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4401 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4402 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4403 .ve 4404 Here the space allocated is sum of all the above values i.e 34, and 4405 hence pre-allocation is perfect. 4406 4407 Level: intermediate 4408 4409 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4410 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4411 @*/ 4412 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4413 { 4414 PetscErrorCode ierr; 4415 PetscMPIInt size; 4416 4417 PetscFunctionBegin; 4418 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4419 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4420 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4421 if (size > 1) { 4422 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4423 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4424 } else { 4425 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4426 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4427 } 4428 PetscFunctionReturn(0); 4429 } 4430 4431 /*@C 4432 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4433 4434 Not collective 4435 4436 Input Parameter: 4437 . A - The MPIAIJ matrix 4438 4439 Output Parameters: 4440 + Ad - The local diagonal block as a SeqAIJ matrix 4441 . Ao - The local off-diagonal block as a SeqAIJ matrix 4442 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4443 4444 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4445 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4446 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4447 local column numbers to global column numbers in the original matrix. 4448 4449 Level: intermediate 4450 4451 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4452 @*/ 4453 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4454 { 4455 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4456 PetscBool flg; 4457 PetscErrorCode ierr; 4458 4459 PetscFunctionBegin; 4460 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4461 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4462 if (Ad) *Ad = a->A; 4463 if (Ao) *Ao = a->B; 4464 if (colmap) *colmap = a->garray; 4465 PetscFunctionReturn(0); 4466 } 4467 4468 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4469 { 4470 PetscErrorCode ierr; 4471 PetscInt m,N,i,rstart,nnz,Ii; 4472 PetscInt *indx; 4473 PetscScalar *values; 4474 4475 PetscFunctionBegin; 4476 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4477 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4478 PetscInt *dnz,*onz,sum,bs,cbs; 4479 4480 if (n == PETSC_DECIDE) { 4481 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4482 } 4483 /* Check sum(n) = N */ 4484 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4485 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4486 4487 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4488 rstart -= m; 4489 4490 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4491 for (i=0; i<m; i++) { 4492 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4493 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4494 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4495 } 4496 4497 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4498 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4499 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4500 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4501 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4502 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4503 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4504 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4505 ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4506 } 4507 4508 /* numeric phase */ 4509 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4510 for (i=0; i<m; i++) { 4511 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4512 Ii = i + rstart; 4513 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4514 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4515 } 4516 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4517 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4518 PetscFunctionReturn(0); 4519 } 4520 4521 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4522 { 4523 PetscErrorCode ierr; 4524 PetscMPIInt rank; 4525 PetscInt m,N,i,rstart,nnz; 4526 size_t len; 4527 const PetscInt *indx; 4528 PetscViewer out; 4529 char *name; 4530 Mat B; 4531 const PetscScalar *values; 4532 4533 PetscFunctionBegin; 4534 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4535 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4536 /* Should this be the type of the diagonal block of A? */ 4537 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4538 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4539 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4540 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4541 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4542 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4543 for (i=0; i<m; i++) { 4544 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4545 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4546 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4547 } 4548 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4549 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4550 4551 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 4552 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4553 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4554 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4555 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4556 ierr = PetscFree(name);CHKERRQ(ierr); 4557 ierr = MatView(B,out);CHKERRQ(ierr); 4558 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4559 ierr = MatDestroy(&B);CHKERRQ(ierr); 4560 PetscFunctionReturn(0); 4561 } 4562 4563 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4564 { 4565 PetscErrorCode ierr; 4566 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4567 4568 PetscFunctionBegin; 4569 if (!merge) PetscFunctionReturn(0); 4570 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4571 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4572 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4573 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4574 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4575 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4576 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4577 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4578 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4579 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4580 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4581 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4582 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4583 ierr = PetscFree(merge);CHKERRQ(ierr); 4584 PetscFunctionReturn(0); 4585 } 4586 4587 #include <../src/mat/utils/freespace.h> 4588 #include <petscbt.h> 4589 4590 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4591 { 4592 PetscErrorCode ierr; 4593 MPI_Comm comm; 4594 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4595 PetscMPIInt size,rank,taga,*len_s; 4596 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4597 PetscInt proc,m; 4598 PetscInt **buf_ri,**buf_rj; 4599 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4600 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4601 MPI_Request *s_waits,*r_waits; 4602 MPI_Status *status; 4603 MatScalar *aa=a->a; 4604 MatScalar **abuf_r,*ba_i; 4605 Mat_Merge_SeqsToMPI *merge; 4606 PetscContainer container; 4607 4608 PetscFunctionBegin; 4609 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4610 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4611 4612 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4613 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4614 4615 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4616 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4617 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4618 4619 bi = merge->bi; 4620 bj = merge->bj; 4621 buf_ri = merge->buf_ri; 4622 buf_rj = merge->buf_rj; 4623 4624 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4625 owners = merge->rowmap->range; 4626 len_s = merge->len_s; 4627 4628 /* send and recv matrix values */ 4629 /*-----------------------------*/ 4630 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4631 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4632 4633 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4634 for (proc=0,k=0; proc<size; proc++) { 4635 if (!len_s[proc]) continue; 4636 i = owners[proc]; 4637 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr); 4638 k++; 4639 } 4640 4641 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);} 4642 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);} 4643 ierr = PetscFree(status);CHKERRQ(ierr); 4644 4645 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4646 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4647 4648 /* insert mat values of mpimat */ 4649 /*----------------------------*/ 4650 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4651 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4652 4653 for (k=0; k<merge->nrecv; k++) { 4654 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4655 nrows = *(buf_ri_k[k]); 4656 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4657 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4658 } 4659 4660 /* set values of ba */ 4661 m = merge->rowmap->n; 4662 for (i=0; i<m; i++) { 4663 arow = owners[rank] + i; 4664 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4665 bnzi = bi[i+1] - bi[i]; 4666 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4667 4668 /* add local non-zero vals of this proc's seqmat into ba */ 4669 anzi = ai[arow+1] - ai[arow]; 4670 aj = a->j + ai[arow]; 4671 aa = a->a + ai[arow]; 4672 nextaj = 0; 4673 for (j=0; nextaj<anzi; j++) { 4674 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4675 ba_i[j] += aa[nextaj++]; 4676 } 4677 } 4678 4679 /* add received vals into ba */ 4680 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4681 /* i-th row */ 4682 if (i == *nextrow[k]) { 4683 anzi = *(nextai[k]+1) - *nextai[k]; 4684 aj = buf_rj[k] + *(nextai[k]); 4685 aa = abuf_r[k] + *(nextai[k]); 4686 nextaj = 0; 4687 for (j=0; nextaj<anzi; j++) { 4688 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4689 ba_i[j] += aa[nextaj++]; 4690 } 4691 } 4692 nextrow[k]++; nextai[k]++; 4693 } 4694 } 4695 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4696 } 4697 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4698 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4699 4700 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4701 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4702 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4703 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4704 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4705 PetscFunctionReturn(0); 4706 } 4707 4708 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4709 { 4710 PetscErrorCode ierr; 4711 Mat B_mpi; 4712 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4713 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4714 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4715 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4716 PetscInt len,proc,*dnz,*onz,bs,cbs; 4717 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4718 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4719 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4720 MPI_Status *status; 4721 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4722 PetscBT lnkbt; 4723 Mat_Merge_SeqsToMPI *merge; 4724 PetscContainer container; 4725 4726 PetscFunctionBegin; 4727 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4728 4729 /* make sure it is a PETSc comm */ 4730 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4731 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4732 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4733 4734 ierr = PetscNew(&merge);CHKERRQ(ierr); 4735 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4736 4737 /* determine row ownership */ 4738 /*---------------------------------------------------------*/ 4739 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4740 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4741 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4742 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4743 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4744 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4745 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4746 4747 m = merge->rowmap->n; 4748 owners = merge->rowmap->range; 4749 4750 /* determine the number of messages to send, their lengths */ 4751 /*---------------------------------------------------------*/ 4752 len_s = merge->len_s; 4753 4754 len = 0; /* length of buf_si[] */ 4755 merge->nsend = 0; 4756 for (proc=0; proc<size; proc++) { 4757 len_si[proc] = 0; 4758 if (proc == rank) { 4759 len_s[proc] = 0; 4760 } else { 4761 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4762 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4763 } 4764 if (len_s[proc]) { 4765 merge->nsend++; 4766 nrows = 0; 4767 for (i=owners[proc]; i<owners[proc+1]; i++) { 4768 if (ai[i+1] > ai[i]) nrows++; 4769 } 4770 len_si[proc] = 2*(nrows+1); 4771 len += len_si[proc]; 4772 } 4773 } 4774 4775 /* determine the number and length of messages to receive for ij-structure */ 4776 /*-------------------------------------------------------------------------*/ 4777 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4778 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4779 4780 /* post the Irecv of j-structure */ 4781 /*-------------------------------*/ 4782 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4783 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4784 4785 /* post the Isend of j-structure */ 4786 /*--------------------------------*/ 4787 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4788 4789 for (proc=0, k=0; proc<size; proc++) { 4790 if (!len_s[proc]) continue; 4791 i = owners[proc]; 4792 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr); 4793 k++; 4794 } 4795 4796 /* receives and sends of j-structure are complete */ 4797 /*------------------------------------------------*/ 4798 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);} 4799 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);} 4800 4801 /* send and recv i-structure */ 4802 /*---------------------------*/ 4803 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4804 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4805 4806 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4807 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4808 for (proc=0,k=0; proc<size; proc++) { 4809 if (!len_s[proc]) continue; 4810 /* form outgoing message for i-structure: 4811 buf_si[0]: nrows to be sent 4812 [1:nrows]: row index (global) 4813 [nrows+1:2*nrows+1]: i-structure index 4814 */ 4815 /*-------------------------------------------*/ 4816 nrows = len_si[proc]/2 - 1; 4817 buf_si_i = buf_si + nrows+1; 4818 buf_si[0] = nrows; 4819 buf_si_i[0] = 0; 4820 nrows = 0; 4821 for (i=owners[proc]; i<owners[proc+1]; i++) { 4822 anzi = ai[i+1] - ai[i]; 4823 if (anzi) { 4824 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4825 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4826 nrows++; 4827 } 4828 } 4829 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr); 4830 k++; 4831 buf_si += len_si[proc]; 4832 } 4833 4834 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);} 4835 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);} 4836 4837 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4838 for (i=0; i<merge->nrecv; i++) { 4839 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4840 } 4841 4842 ierr = PetscFree(len_si);CHKERRQ(ierr); 4843 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4844 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4845 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4846 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4847 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4848 ierr = PetscFree(status);CHKERRQ(ierr); 4849 4850 /* compute a local seq matrix in each processor */ 4851 /*----------------------------------------------*/ 4852 /* allocate bi array and free space for accumulating nonzero column info */ 4853 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4854 bi[0] = 0; 4855 4856 /* create and initialize a linked list */ 4857 nlnk = N+1; 4858 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4859 4860 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4861 len = ai[owners[rank+1]] - ai[owners[rank]]; 4862 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4863 4864 current_space = free_space; 4865 4866 /* determine symbolic info for each local row */ 4867 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4868 4869 for (k=0; k<merge->nrecv; k++) { 4870 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4871 nrows = *buf_ri_k[k]; 4872 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4873 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4874 } 4875 4876 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4877 len = 0; 4878 for (i=0; i<m; i++) { 4879 bnzi = 0; 4880 /* add local non-zero cols of this proc's seqmat into lnk */ 4881 arow = owners[rank] + i; 4882 anzi = ai[arow+1] - ai[arow]; 4883 aj = a->j + ai[arow]; 4884 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4885 bnzi += nlnk; 4886 /* add received col data into lnk */ 4887 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4888 if (i == *nextrow[k]) { /* i-th row */ 4889 anzi = *(nextai[k]+1) - *nextai[k]; 4890 aj = buf_rj[k] + *nextai[k]; 4891 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4892 bnzi += nlnk; 4893 nextrow[k]++; nextai[k]++; 4894 } 4895 } 4896 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4897 4898 /* if free space is not available, make more free space */ 4899 if (current_space->local_remaining<bnzi) { 4900 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4901 nspacedouble++; 4902 } 4903 /* copy data into free space, then initialize lnk */ 4904 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4905 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4906 4907 current_space->array += bnzi; 4908 current_space->local_used += bnzi; 4909 current_space->local_remaining -= bnzi; 4910 4911 bi[i+1] = bi[i] + bnzi; 4912 } 4913 4914 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4915 4916 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4917 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4918 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4919 4920 /* create symbolic parallel matrix B_mpi */ 4921 /*---------------------------------------*/ 4922 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4923 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4924 if (n==PETSC_DECIDE) { 4925 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4926 } else { 4927 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4928 } 4929 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4930 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4931 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4932 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4933 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4934 4935 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4936 B_mpi->assembled = PETSC_FALSE; 4937 merge->bi = bi; 4938 merge->bj = bj; 4939 merge->buf_ri = buf_ri; 4940 merge->buf_rj = buf_rj; 4941 merge->coi = NULL; 4942 merge->coj = NULL; 4943 merge->owners_co = NULL; 4944 4945 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4946 4947 /* attach the supporting struct to B_mpi for reuse */ 4948 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4949 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4950 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 4951 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4952 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4953 *mpimat = B_mpi; 4954 4955 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4956 PetscFunctionReturn(0); 4957 } 4958 4959 /*@C 4960 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4961 matrices from each processor 4962 4963 Collective 4964 4965 Input Parameters: 4966 + comm - the communicators the parallel matrix will live on 4967 . seqmat - the input sequential matrices 4968 . m - number of local rows (or PETSC_DECIDE) 4969 . n - number of local columns (or PETSC_DECIDE) 4970 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4971 4972 Output Parameter: 4973 . mpimat - the parallel matrix generated 4974 4975 Level: advanced 4976 4977 Notes: 4978 The dimensions of the sequential matrix in each processor MUST be the same. 4979 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4980 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4981 @*/ 4982 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4983 { 4984 PetscErrorCode ierr; 4985 PetscMPIInt size; 4986 4987 PetscFunctionBegin; 4988 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4989 if (size == 1) { 4990 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4991 if (scall == MAT_INITIAL_MATRIX) { 4992 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4993 } else { 4994 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4995 } 4996 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4997 PetscFunctionReturn(0); 4998 } 4999 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5000 if (scall == MAT_INITIAL_MATRIX) { 5001 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5002 } 5003 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5004 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5005 PetscFunctionReturn(0); 5006 } 5007 5008 /*@ 5009 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5010 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5011 with MatGetSize() 5012 5013 Not Collective 5014 5015 Input Parameters: 5016 + A - the matrix 5017 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5018 5019 Output Parameter: 5020 . A_loc - the local sequential matrix generated 5021 5022 Level: developer 5023 5024 Notes: 5025 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5026 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5027 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5028 modify the values of the returned A_loc. 5029 5030 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 5031 @*/ 5032 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5033 { 5034 PetscErrorCode ierr; 5035 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5036 Mat_SeqAIJ *mat,*a,*b; 5037 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5038 const PetscScalar *aa,*ba,*aav,*bav; 5039 PetscScalar *ca,*cam; 5040 PetscMPIInt size; 5041 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5042 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5043 PetscBool match; 5044 5045 PetscFunctionBegin; 5046 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5047 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5048 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5049 if (size == 1) { 5050 if (scall == MAT_INITIAL_MATRIX) { 5051 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5052 *A_loc = mpimat->A; 5053 } else if (scall == MAT_REUSE_MATRIX) { 5054 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5055 } 5056 PetscFunctionReturn(0); 5057 } 5058 5059 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5060 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5061 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5062 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5063 ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5064 ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5065 aa = aav; 5066 ba = bav; 5067 if (scall == MAT_INITIAL_MATRIX) { 5068 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5069 ci[0] = 0; 5070 for (i=0; i<am; i++) { 5071 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5072 } 5073 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5074 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5075 k = 0; 5076 for (i=0; i<am; i++) { 5077 ncols_o = bi[i+1] - bi[i]; 5078 ncols_d = ai[i+1] - ai[i]; 5079 /* off-diagonal portion of A */ 5080 for (jo=0; jo<ncols_o; jo++) { 5081 col = cmap[*bj]; 5082 if (col >= cstart) break; 5083 cj[k] = col; bj++; 5084 ca[k++] = *ba++; 5085 } 5086 /* diagonal portion of A */ 5087 for (j=0; j<ncols_d; j++) { 5088 cj[k] = cstart + *aj++; 5089 ca[k++] = *aa++; 5090 } 5091 /* off-diagonal portion of A */ 5092 for (j=jo; j<ncols_o; j++) { 5093 cj[k] = cmap[*bj++]; 5094 ca[k++] = *ba++; 5095 } 5096 } 5097 /* put together the new matrix */ 5098 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5099 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5100 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5101 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5102 mat->free_a = PETSC_TRUE; 5103 mat->free_ij = PETSC_TRUE; 5104 mat->nonew = 0; 5105 } else if (scall == MAT_REUSE_MATRIX) { 5106 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5107 #if defined(PETSC_USE_DEVICE) 5108 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5109 #endif 5110 ci = mat->i; cj = mat->j; cam = mat->a; 5111 for (i=0; i<am; i++) { 5112 /* off-diagonal portion of A */ 5113 ncols_o = bi[i+1] - bi[i]; 5114 for (jo=0; jo<ncols_o; jo++) { 5115 col = cmap[*bj]; 5116 if (col >= cstart) break; 5117 *cam++ = *ba++; bj++; 5118 } 5119 /* diagonal portion of A */ 5120 ncols_d = ai[i+1] - ai[i]; 5121 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5122 /* off-diagonal portion of A */ 5123 for (j=jo; j<ncols_o; j++) { 5124 *cam++ = *ba++; bj++; 5125 } 5126 } 5127 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5128 ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5129 ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5130 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5131 PetscFunctionReturn(0); 5132 } 5133 5134 /*@ 5135 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5136 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5137 5138 Not Collective 5139 5140 Input Parameters: 5141 + A - the matrix 5142 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5143 5144 Output Parameter: 5145 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5146 - A_loc - the local sequential matrix generated 5147 5148 Level: developer 5149 5150 Notes: 5151 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5152 5153 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5154 5155 @*/ 5156 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5157 { 5158 PetscErrorCode ierr; 5159 Mat Ao,Ad; 5160 const PetscInt *cmap; 5161 PetscMPIInt size; 5162 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5163 5164 PetscFunctionBegin; 5165 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr); 5166 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5167 if (size == 1) { 5168 if (scall == MAT_INITIAL_MATRIX) { 5169 ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr); 5170 *A_loc = Ad; 5171 } else if (scall == MAT_REUSE_MATRIX) { 5172 ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5173 } 5174 if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); } 5175 PetscFunctionReturn(0); 5176 } 5177 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr); 5178 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5179 if (f) { 5180 ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr); 5181 } else { 5182 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5183 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5184 Mat_SeqAIJ *c; 5185 PetscInt *ai = a->i, *aj = a->j; 5186 PetscInt *bi = b->i, *bj = b->j; 5187 PetscInt *ci,*cj; 5188 const PetscScalar *aa,*ba; 5189 PetscScalar *ca; 5190 PetscInt i,j,am,dn,on; 5191 5192 ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr); 5193 ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr); 5194 ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr); 5195 ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr); 5196 if (scall == MAT_INITIAL_MATRIX) { 5197 PetscInt k; 5198 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5199 ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr); 5200 ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr); 5201 ci[0] = 0; 5202 for (i=0,k=0; i<am; i++) { 5203 const PetscInt ncols_o = bi[i+1] - bi[i]; 5204 const PetscInt ncols_d = ai[i+1] - ai[i]; 5205 ci[i+1] = ci[i] + ncols_o + ncols_d; 5206 /* diagonal portion of A */ 5207 for (j=0; j<ncols_d; j++,k++) { 5208 cj[k] = *aj++; 5209 ca[k] = *aa++; 5210 } 5211 /* off-diagonal portion of A */ 5212 for (j=0; j<ncols_o; j++,k++) { 5213 cj[k] = dn + *bj++; 5214 ca[k] = *ba++; 5215 } 5216 } 5217 /* put together the new matrix */ 5218 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr); 5219 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5220 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5221 c = (Mat_SeqAIJ*)(*A_loc)->data; 5222 c->free_a = PETSC_TRUE; 5223 c->free_ij = PETSC_TRUE; 5224 c->nonew = 0; 5225 ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr); 5226 } else if (scall == MAT_REUSE_MATRIX) { 5227 #if defined(PETSC_HAVE_DEVICE) 5228 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5229 #endif 5230 c = (Mat_SeqAIJ*)(*A_loc)->data; 5231 ca = c->a; 5232 for (i=0; i<am; i++) { 5233 const PetscInt ncols_d = ai[i+1] - ai[i]; 5234 const PetscInt ncols_o = bi[i+1] - bi[i]; 5235 /* diagonal portion of A */ 5236 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5237 /* off-diagonal portion of A */ 5238 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5239 } 5240 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5241 ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr); 5242 ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr); 5243 if (glob) { 5244 PetscInt cst, *gidx; 5245 5246 ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr); 5247 ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr); 5248 for (i=0; i<dn; i++) gidx[i] = cst + i; 5249 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5250 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr); 5251 } 5252 } 5253 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5254 PetscFunctionReturn(0); 5255 } 5256 5257 /*@C 5258 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5259 5260 Not Collective 5261 5262 Input Parameters: 5263 + A - the matrix 5264 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5265 - row, col - index sets of rows and columns to extract (or NULL) 5266 5267 Output Parameter: 5268 . A_loc - the local sequential matrix generated 5269 5270 Level: developer 5271 5272 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5273 5274 @*/ 5275 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5276 { 5277 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5278 PetscErrorCode ierr; 5279 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5280 IS isrowa,iscola; 5281 Mat *aloc; 5282 PetscBool match; 5283 5284 PetscFunctionBegin; 5285 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5286 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5287 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5288 if (!row) { 5289 start = A->rmap->rstart; end = A->rmap->rend; 5290 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5291 } else { 5292 isrowa = *row; 5293 } 5294 if (!col) { 5295 start = A->cmap->rstart; 5296 cmap = a->garray; 5297 nzA = a->A->cmap->n; 5298 nzB = a->B->cmap->n; 5299 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5300 ncols = 0; 5301 for (i=0; i<nzB; i++) { 5302 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5303 else break; 5304 } 5305 imark = i; 5306 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5307 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5308 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5309 } else { 5310 iscola = *col; 5311 } 5312 if (scall != MAT_INITIAL_MATRIX) { 5313 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5314 aloc[0] = *A_loc; 5315 } 5316 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5317 if (!col) { /* attach global id of condensed columns */ 5318 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5319 } 5320 *A_loc = aloc[0]; 5321 ierr = PetscFree(aloc);CHKERRQ(ierr); 5322 if (!row) { 5323 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5324 } 5325 if (!col) { 5326 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5327 } 5328 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5329 PetscFunctionReturn(0); 5330 } 5331 5332 /* 5333 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5334 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5335 * on a global size. 5336 * */ 5337 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5338 { 5339 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5340 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5341 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5342 PetscMPIInt owner; 5343 PetscSFNode *iremote,*oiremote; 5344 const PetscInt *lrowindices; 5345 PetscErrorCode ierr; 5346 PetscSF sf,osf; 5347 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5348 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5349 MPI_Comm comm; 5350 ISLocalToGlobalMapping mapping; 5351 5352 PetscFunctionBegin; 5353 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5354 /* plocalsize is the number of roots 5355 * nrows is the number of leaves 5356 * */ 5357 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5358 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5359 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5360 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5361 for (i=0;i<nrows;i++) { 5362 /* Find a remote index and an owner for a row 5363 * The row could be local or remote 5364 * */ 5365 owner = 0; 5366 lidx = 0; 5367 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5368 iremote[i].index = lidx; 5369 iremote[i].rank = owner; 5370 } 5371 /* Create SF to communicate how many nonzero columns for each row */ 5372 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5373 /* SF will figure out the number of nonzero colunms for each row, and their 5374 * offsets 5375 * */ 5376 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5377 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5378 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5379 5380 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5381 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5382 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5383 roffsets[0] = 0; 5384 roffsets[1] = 0; 5385 for (i=0;i<plocalsize;i++) { 5386 /* diag */ 5387 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5388 /* off diag */ 5389 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5390 /* compute offsets so that we relative location for each row */ 5391 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5392 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5393 } 5394 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5395 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5396 /* 'r' means root, and 'l' means leaf */ 5397 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5398 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5399 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5400 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5401 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5402 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5403 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5404 dntotalcols = 0; 5405 ontotalcols = 0; 5406 ncol = 0; 5407 for (i=0;i<nrows;i++) { 5408 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5409 ncol = PetscMax(pnnz[i],ncol); 5410 /* diag */ 5411 dntotalcols += nlcols[i*2+0]; 5412 /* off diag */ 5413 ontotalcols += nlcols[i*2+1]; 5414 } 5415 /* We do not need to figure the right number of columns 5416 * since all the calculations will be done by going through the raw data 5417 * */ 5418 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5419 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5420 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5421 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5422 /* diag */ 5423 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5424 /* off diag */ 5425 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5426 /* diag */ 5427 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5428 /* off diag */ 5429 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5430 dntotalcols = 0; 5431 ontotalcols = 0; 5432 ntotalcols = 0; 5433 for (i=0;i<nrows;i++) { 5434 owner = 0; 5435 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5436 /* Set iremote for diag matrix */ 5437 for (j=0;j<nlcols[i*2+0];j++) { 5438 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5439 iremote[dntotalcols].rank = owner; 5440 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5441 ilocal[dntotalcols++] = ntotalcols++; 5442 } 5443 /* off diag */ 5444 for (j=0;j<nlcols[i*2+1];j++) { 5445 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5446 oiremote[ontotalcols].rank = owner; 5447 oilocal[ontotalcols++] = ntotalcols++; 5448 } 5449 } 5450 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5451 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5452 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5453 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5454 /* P serves as roots and P_oth is leaves 5455 * Diag matrix 5456 * */ 5457 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5458 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5459 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5460 5461 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5462 /* Off diag */ 5463 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5464 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5465 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5466 /* We operate on the matrix internal data for saving memory */ 5467 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5468 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5469 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5470 /* Convert to global indices for diag matrix */ 5471 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5472 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5473 /* We want P_oth store global indices */ 5474 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5475 /* Use memory scalable approach */ 5476 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5477 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5478 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5479 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5480 /* Convert back to local indices */ 5481 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5482 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5483 nout = 0; 5484 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5485 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5486 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5487 /* Exchange values */ 5488 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5489 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5490 /* Stop PETSc from shrinking memory */ 5491 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5492 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5493 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5494 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5495 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5496 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5497 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5498 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5499 PetscFunctionReturn(0); 5500 } 5501 5502 /* 5503 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5504 * This supports MPIAIJ and MAIJ 5505 * */ 5506 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5507 { 5508 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5509 Mat_SeqAIJ *p_oth; 5510 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5511 IS rows,map; 5512 PetscHMapI hamp; 5513 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5514 MPI_Comm comm; 5515 PetscSF sf,osf; 5516 PetscBool has; 5517 PetscErrorCode ierr; 5518 5519 PetscFunctionBegin; 5520 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5521 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5522 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5523 * and then create a submatrix (that often is an overlapping matrix) 5524 * */ 5525 if (reuse == MAT_INITIAL_MATRIX) { 5526 /* Use a hash table to figure out unique keys */ 5527 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5528 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5529 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5530 count = 0; 5531 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5532 for (i=0;i<a->B->cmap->n;i++) { 5533 key = a->garray[i]/dof; 5534 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5535 if (!has) { 5536 mapping[i] = count; 5537 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5538 } else { 5539 /* Current 'i' has the same value the previous step */ 5540 mapping[i] = count-1; 5541 } 5542 } 5543 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5544 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5545 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count); 5546 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5547 off = 0; 5548 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5549 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5550 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5551 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5552 /* In case, the matrix was already created but users want to recreate the matrix */ 5553 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5554 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5555 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5556 ierr = ISDestroy(&map);CHKERRQ(ierr); 5557 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5558 } else if (reuse == MAT_REUSE_MATRIX) { 5559 /* If matrix was already created, we simply update values using SF objects 5560 * that as attached to the matrix ealier. 5561 * */ 5562 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5563 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5564 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5565 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5566 /* Update values in place */ 5567 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5568 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5569 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5570 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5571 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5572 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5573 PetscFunctionReturn(0); 5574 } 5575 5576 /*@C 5577 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5578 5579 Collective on Mat 5580 5581 Input Parameters: 5582 + A,B - the matrices in mpiaij format 5583 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5584 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5585 5586 Output Parameter: 5587 + rowb, colb - index sets of rows and columns of B to extract 5588 - B_seq - the sequential matrix generated 5589 5590 Level: developer 5591 5592 @*/ 5593 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5594 { 5595 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5596 PetscErrorCode ierr; 5597 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5598 IS isrowb,iscolb; 5599 Mat *bseq=NULL; 5600 5601 PetscFunctionBegin; 5602 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5603 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5604 } 5605 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5606 5607 if (scall == MAT_INITIAL_MATRIX) { 5608 start = A->cmap->rstart; 5609 cmap = a->garray; 5610 nzA = a->A->cmap->n; 5611 nzB = a->B->cmap->n; 5612 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5613 ncols = 0; 5614 for (i=0; i<nzB; i++) { /* row < local row index */ 5615 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5616 else break; 5617 } 5618 imark = i; 5619 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5620 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5621 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5622 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5623 } else { 5624 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5625 isrowb = *rowb; iscolb = *colb; 5626 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5627 bseq[0] = *B_seq; 5628 } 5629 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5630 *B_seq = bseq[0]; 5631 ierr = PetscFree(bseq);CHKERRQ(ierr); 5632 if (!rowb) { 5633 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5634 } else { 5635 *rowb = isrowb; 5636 } 5637 if (!colb) { 5638 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5639 } else { 5640 *colb = iscolb; 5641 } 5642 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5643 PetscFunctionReturn(0); 5644 } 5645 5646 /* 5647 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5648 of the OFF-DIAGONAL portion of local A 5649 5650 Collective on Mat 5651 5652 Input Parameters: 5653 + A,B - the matrices in mpiaij format 5654 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5655 5656 Output Parameter: 5657 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5658 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5659 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5660 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5661 5662 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5663 for this matrix. This is not desirable.. 5664 5665 Level: developer 5666 5667 */ 5668 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5669 { 5670 PetscErrorCode ierr; 5671 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5672 Mat_SeqAIJ *b_oth; 5673 VecScatter ctx; 5674 MPI_Comm comm; 5675 const PetscMPIInt *rprocs,*sprocs; 5676 const PetscInt *srow,*rstarts,*sstarts; 5677 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5678 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5679 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5680 MPI_Request *rwaits = NULL,*swaits = NULL; 5681 MPI_Status rstatus; 5682 PetscMPIInt size,tag,rank,nsends_mpi,nrecvs_mpi; 5683 PETSC_UNUSED PetscMPIInt jj; 5684 5685 PetscFunctionBegin; 5686 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5687 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5688 5689 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5690 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5691 } 5692 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5693 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 5694 5695 if (size == 1) { 5696 startsj_s = NULL; 5697 bufa_ptr = NULL; 5698 *B_oth = NULL; 5699 PetscFunctionReturn(0); 5700 } 5701 5702 ctx = a->Mvctx; 5703 tag = ((PetscObject)ctx)->tag; 5704 5705 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5706 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5707 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5708 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5709 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5710 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5711 5712 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5713 if (scall == MAT_INITIAL_MATRIX) { 5714 /* i-array */ 5715 /*---------*/ 5716 /* post receives */ 5717 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5718 for (i=0; i<nrecvs; i++) { 5719 rowlen = rvalues + rstarts[i]*rbs; 5720 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5721 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5722 } 5723 5724 /* pack the outgoing message */ 5725 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5726 5727 sstartsj[0] = 0; 5728 rstartsj[0] = 0; 5729 len = 0; /* total length of j or a array to be sent */ 5730 if (nsends) { 5731 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5732 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5733 } 5734 for (i=0; i<nsends; i++) { 5735 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5736 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5737 for (j=0; j<nrows; j++) { 5738 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5739 for (l=0; l<sbs; l++) { 5740 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5741 5742 rowlen[j*sbs+l] = ncols; 5743 5744 len += ncols; 5745 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5746 } 5747 k++; 5748 } 5749 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5750 5751 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5752 } 5753 /* recvs and sends of i-array are completed */ 5754 i = nrecvs; 5755 while (i--) { 5756 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr); 5757 } 5758 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5759 ierr = PetscFree(svalues);CHKERRQ(ierr); 5760 5761 /* allocate buffers for sending j and a arrays */ 5762 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5763 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5764 5765 /* create i-array of B_oth */ 5766 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5767 5768 b_othi[0] = 0; 5769 len = 0; /* total length of j or a array to be received */ 5770 k = 0; 5771 for (i=0; i<nrecvs; i++) { 5772 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5773 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5774 for (j=0; j<nrows; j++) { 5775 b_othi[k+1] = b_othi[k] + rowlen[j]; 5776 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5777 k++; 5778 } 5779 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5780 } 5781 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5782 5783 /* allocate space for j and a arrrays of B_oth */ 5784 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5785 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5786 5787 /* j-array */ 5788 /*---------*/ 5789 /* post receives of j-array */ 5790 for (i=0; i<nrecvs; i++) { 5791 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5792 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5793 } 5794 5795 /* pack the outgoing message j-array */ 5796 if (nsends) k = sstarts[0]; 5797 for (i=0; i<nsends; i++) { 5798 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5799 bufJ = bufj+sstartsj[i]; 5800 for (j=0; j<nrows; j++) { 5801 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5802 for (ll=0; ll<sbs; ll++) { 5803 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5804 for (l=0; l<ncols; l++) { 5805 *bufJ++ = cols[l]; 5806 } 5807 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5808 } 5809 } 5810 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5811 } 5812 5813 /* recvs and sends of j-array are completed */ 5814 i = nrecvs; 5815 while (i--) { 5816 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr); 5817 } 5818 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5819 } else if (scall == MAT_REUSE_MATRIX) { 5820 sstartsj = *startsj_s; 5821 rstartsj = *startsj_r; 5822 bufa = *bufa_ptr; 5823 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5824 b_otha = b_oth->a; 5825 #if defined(PETSC_HAVE_DEVICE) 5826 (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU; 5827 #endif 5828 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5829 5830 /* a-array */ 5831 /*---------*/ 5832 /* post receives of a-array */ 5833 for (i=0; i<nrecvs; i++) { 5834 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5835 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5836 } 5837 5838 /* pack the outgoing message a-array */ 5839 if (nsends) k = sstarts[0]; 5840 for (i=0; i<nsends; i++) { 5841 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5842 bufA = bufa+sstartsj[i]; 5843 for (j=0; j<nrows; j++) { 5844 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5845 for (ll=0; ll<sbs; ll++) { 5846 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5847 for (l=0; l<ncols; l++) { 5848 *bufA++ = vals[l]; 5849 } 5850 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5851 } 5852 } 5853 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5854 } 5855 /* recvs and sends of a-array are completed */ 5856 i = nrecvs; 5857 while (i--) { 5858 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr); 5859 } 5860 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5861 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5862 5863 if (scall == MAT_INITIAL_MATRIX) { 5864 /* put together the new matrix */ 5865 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5866 5867 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5868 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5869 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5870 b_oth->free_a = PETSC_TRUE; 5871 b_oth->free_ij = PETSC_TRUE; 5872 b_oth->nonew = 0; 5873 5874 ierr = PetscFree(bufj);CHKERRQ(ierr); 5875 if (!startsj_s || !bufa_ptr) { 5876 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5877 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5878 } else { 5879 *startsj_s = sstartsj; 5880 *startsj_r = rstartsj; 5881 *bufa_ptr = bufa; 5882 } 5883 } 5884 5885 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5886 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5887 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5888 PetscFunctionReturn(0); 5889 } 5890 5891 /*@C 5892 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5893 5894 Not Collective 5895 5896 Input Parameters: 5897 . A - The matrix in mpiaij format 5898 5899 Output Parameter: 5900 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5901 . colmap - A map from global column index to local index into lvec 5902 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5903 5904 Level: developer 5905 5906 @*/ 5907 #if defined(PETSC_USE_CTABLE) 5908 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5909 #else 5910 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5911 #endif 5912 { 5913 Mat_MPIAIJ *a; 5914 5915 PetscFunctionBegin; 5916 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5917 PetscValidPointer(lvec, 2); 5918 PetscValidPointer(colmap, 3); 5919 PetscValidPointer(multScatter, 4); 5920 a = (Mat_MPIAIJ*) A->data; 5921 if (lvec) *lvec = a->lvec; 5922 if (colmap) *colmap = a->colmap; 5923 if (multScatter) *multScatter = a->Mvctx; 5924 PetscFunctionReturn(0); 5925 } 5926 5927 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5928 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5929 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5930 #if defined(PETSC_HAVE_MKL_SPARSE) 5931 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5932 #endif 5933 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5934 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5935 #if defined(PETSC_HAVE_ELEMENTAL) 5936 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5937 #endif 5938 #if defined(PETSC_HAVE_SCALAPACK) 5939 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5940 #endif 5941 #if defined(PETSC_HAVE_HYPRE) 5942 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5943 #endif 5944 #if defined(PETSC_HAVE_CUDA) 5945 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5946 #endif 5947 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5948 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5949 #endif 5950 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5951 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5952 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5953 5954 /* 5955 Computes (B'*A')' since computing B*A directly is untenable 5956 5957 n p p 5958 [ ] [ ] [ ] 5959 m [ A ] * n [ B ] = m [ C ] 5960 [ ] [ ] [ ] 5961 5962 */ 5963 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5964 { 5965 PetscErrorCode ierr; 5966 Mat At,Bt,Ct; 5967 5968 PetscFunctionBegin; 5969 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5970 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5971 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5972 ierr = MatDestroy(&At);CHKERRQ(ierr); 5973 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5974 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5975 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5976 PetscFunctionReturn(0); 5977 } 5978 5979 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5980 { 5981 PetscErrorCode ierr; 5982 PetscBool cisdense; 5983 5984 PetscFunctionBegin; 5985 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5986 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 5987 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5988 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 5989 if (!cisdense) { 5990 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 5991 } 5992 ierr = MatSetUp(C);CHKERRQ(ierr); 5993 5994 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5995 PetscFunctionReturn(0); 5996 } 5997 5998 /* ----------------------------------------------------------------*/ 5999 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6000 { 6001 Mat_Product *product = C->product; 6002 Mat A = product->A,B=product->B; 6003 6004 PetscFunctionBegin; 6005 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6006 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6007 6008 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6009 C->ops->productsymbolic = MatProductSymbolic_AB; 6010 PetscFunctionReturn(0); 6011 } 6012 6013 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6014 { 6015 PetscErrorCode ierr; 6016 Mat_Product *product = C->product; 6017 6018 PetscFunctionBegin; 6019 if (product->type == MATPRODUCT_AB) { 6020 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 6021 } 6022 PetscFunctionReturn(0); 6023 } 6024 /* ----------------------------------------------------------------*/ 6025 6026 /*MC 6027 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6028 6029 Options Database Keys: 6030 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6031 6032 Level: beginner 6033 6034 Notes: 6035 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6036 in this case the values associated with the rows and columns one passes in are set to zero 6037 in the matrix 6038 6039 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6040 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6041 6042 .seealso: MatCreateAIJ() 6043 M*/ 6044 6045 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6046 { 6047 Mat_MPIAIJ *b; 6048 PetscErrorCode ierr; 6049 PetscMPIInt size; 6050 6051 PetscFunctionBegin; 6052 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 6053 6054 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6055 B->data = (void*)b; 6056 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6057 B->assembled = PETSC_FALSE; 6058 B->insertmode = NOT_SET_VALUES; 6059 b->size = size; 6060 6061 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr); 6062 6063 /* build cache for off array entries formed */ 6064 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6065 6066 b->donotstash = PETSC_FALSE; 6067 b->colmap = NULL; 6068 b->garray = NULL; 6069 b->roworiented = PETSC_TRUE; 6070 6071 /* stuff used for matrix vector multiply */ 6072 b->lvec = NULL; 6073 b->Mvctx = NULL; 6074 6075 /* stuff for MatGetRow() */ 6076 b->rowindices = NULL; 6077 b->rowvalues = NULL; 6078 b->getrowactive = PETSC_FALSE; 6079 6080 /* flexible pointer used in CUSPARSE classes */ 6081 b->spptr = NULL; 6082 6083 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6085 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6086 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6087 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6088 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6089 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6090 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6092 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6093 #if defined(PETSC_HAVE_CUDA) 6094 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr); 6095 #endif 6096 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr); 6098 #endif 6099 #if defined(PETSC_HAVE_MKL_SPARSE) 6100 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6101 #endif 6102 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6103 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6104 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6105 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr); 6106 #if defined(PETSC_HAVE_ELEMENTAL) 6107 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6108 #endif 6109 #if defined(PETSC_HAVE_SCALAPACK) 6110 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6111 #endif 6112 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6113 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6114 #if defined(PETSC_HAVE_HYPRE) 6115 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6116 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6117 #endif 6118 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6119 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6120 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6121 PetscFunctionReturn(0); 6122 } 6123 6124 /*@C 6125 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6126 and "off-diagonal" part of the matrix in CSR format. 6127 6128 Collective 6129 6130 Input Parameters: 6131 + comm - MPI communicator 6132 . m - number of local rows (Cannot be PETSC_DECIDE) 6133 . n - This value should be the same as the local size used in creating the 6134 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6135 calculated if N is given) For square matrices n is almost always m. 6136 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6137 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6138 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6139 . j - column indices 6140 . a - matrix values 6141 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6142 . oj - column indices 6143 - oa - matrix values 6144 6145 Output Parameter: 6146 . mat - the matrix 6147 6148 Level: advanced 6149 6150 Notes: 6151 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6152 must free the arrays once the matrix has been destroyed and not before. 6153 6154 The i and j indices are 0 based 6155 6156 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6157 6158 This sets local rows and cannot be used to set off-processor values. 6159 6160 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6161 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6162 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6163 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6164 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6165 communication if it is known that only local entries will be set. 6166 6167 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6168 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6169 @*/ 6170 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6171 { 6172 PetscErrorCode ierr; 6173 Mat_MPIAIJ *maij; 6174 6175 PetscFunctionBegin; 6176 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6177 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6178 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6179 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6180 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6181 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6182 maij = (Mat_MPIAIJ*) (*mat)->data; 6183 6184 (*mat)->preallocated = PETSC_TRUE; 6185 6186 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6187 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6188 6189 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6190 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6191 6192 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6193 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6194 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6195 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6196 6197 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6198 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6199 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6200 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6201 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6202 PetscFunctionReturn(0); 6203 } 6204 6205 /* 6206 Special version for direct calls from Fortran 6207 */ 6208 #include <petsc/private/fortranimpl.h> 6209 6210 /* Change these macros so can be used in void function */ 6211 #undef CHKERRQ 6212 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6213 #undef SETERRQ2 6214 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6215 #undef SETERRQ3 6216 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6217 #undef SETERRQ 6218 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6219 6220 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6221 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6222 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6223 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6224 #else 6225 #endif 6226 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6227 { 6228 Mat mat = *mmat; 6229 PetscInt m = *mm, n = *mn; 6230 InsertMode addv = *maddv; 6231 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6232 PetscScalar value; 6233 PetscErrorCode ierr; 6234 6235 MatCheckPreallocated(mat,1); 6236 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6237 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6238 { 6239 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6240 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6241 PetscBool roworiented = aij->roworiented; 6242 6243 /* Some Variables required in the macro */ 6244 Mat A = aij->A; 6245 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6246 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6247 MatScalar *aa = a->a; 6248 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6249 Mat B = aij->B; 6250 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6251 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6252 MatScalar *ba = b->a; 6253 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6254 * cannot use "#if defined" inside a macro. */ 6255 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6256 6257 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6258 PetscInt nonew = a->nonew; 6259 MatScalar *ap1,*ap2; 6260 6261 PetscFunctionBegin; 6262 for (i=0; i<m; i++) { 6263 if (im[i] < 0) continue; 6264 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6265 if (im[i] >= rstart && im[i] < rend) { 6266 row = im[i] - rstart; 6267 lastcol1 = -1; 6268 rp1 = aj + ai[row]; 6269 ap1 = aa + ai[row]; 6270 rmax1 = aimax[row]; 6271 nrow1 = ailen[row]; 6272 low1 = 0; 6273 high1 = nrow1; 6274 lastcol2 = -1; 6275 rp2 = bj + bi[row]; 6276 ap2 = ba + bi[row]; 6277 rmax2 = bimax[row]; 6278 nrow2 = bilen[row]; 6279 low2 = 0; 6280 high2 = nrow2; 6281 6282 for (j=0; j<n; j++) { 6283 if (roworiented) value = v[i*n+j]; 6284 else value = v[i+j*m]; 6285 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6286 if (in[j] >= cstart && in[j] < cend) { 6287 col = in[j] - cstart; 6288 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6289 #if defined(PETSC_HAVE_DEVICE) 6290 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6291 #endif 6292 } else if (in[j] < 0) continue; 6293 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6294 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6295 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6296 } else { 6297 if (mat->was_assembled) { 6298 if (!aij->colmap) { 6299 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6300 } 6301 #if defined(PETSC_USE_CTABLE) 6302 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6303 col--; 6304 #else 6305 col = aij->colmap[in[j]] - 1; 6306 #endif 6307 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6308 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6309 col = in[j]; 6310 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6311 B = aij->B; 6312 b = (Mat_SeqAIJ*)B->data; 6313 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6314 rp2 = bj + bi[row]; 6315 ap2 = ba + bi[row]; 6316 rmax2 = bimax[row]; 6317 nrow2 = bilen[row]; 6318 low2 = 0; 6319 high2 = nrow2; 6320 bm = aij->B->rmap->n; 6321 ba = b->a; 6322 inserted = PETSC_FALSE; 6323 } 6324 } else col = in[j]; 6325 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6326 #if defined(PETSC_HAVE_DEVICE) 6327 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6328 #endif 6329 } 6330 } 6331 } else if (!aij->donotstash) { 6332 if (roworiented) { 6333 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6334 } else { 6335 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6336 } 6337 } 6338 } 6339 } 6340 PetscFunctionReturnVoid(); 6341 } 6342 6343 typedef struct { 6344 Mat *mp; /* intermediate products */ 6345 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6346 PetscInt cp; /* number of intermediate products */ 6347 6348 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6349 PetscInt *startsj_s,*startsj_r; 6350 PetscScalar *bufa; 6351 Mat P_oth; 6352 6353 /* may take advantage of merging product->B */ 6354 Mat Bloc; 6355 6356 /* cusparse does not have support to split between symbolic and numeric phases 6357 When api_user is true, we don't need to update the numerical values 6358 of the temporary storage */ 6359 PetscBool reusesym; 6360 6361 /* support for COO values insertion */ 6362 PetscScalar *coo_v,*coo_w; 6363 PetscInt **own; 6364 PetscInt **off; 6365 PetscBool hasoffproc; /* if true, non-local values insertion (i.e. AtB or PtAP) */ 6366 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6367 PetscMemType mtype; 6368 6369 /* customization */ 6370 PetscBool abmerge; 6371 PetscBool P_oth_bind; 6372 } MatMatMPIAIJBACKEND; 6373 6374 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6375 { 6376 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6377 PetscInt i; 6378 PetscErrorCode ierr; 6379 6380 PetscFunctionBegin; 6381 ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr); 6382 ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr); 6383 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr); 6384 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr); 6385 ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr); 6386 ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr); 6387 ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr); 6388 for (i = 0; i < mmdata->cp; i++) { 6389 ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr); 6390 } 6391 ierr = PetscFree(mmdata->mp);CHKERRQ(ierr); 6392 ierr = PetscFree(mmdata->mptmp);CHKERRQ(ierr); 6393 ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr); 6394 ierr = PetscFree(mmdata->own);CHKERRQ(ierr); 6395 ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr); 6396 ierr = PetscFree(mmdata->off);CHKERRQ(ierr); 6397 ierr = PetscFree(mmdata);CHKERRQ(ierr); 6398 PetscFunctionReturn(0); 6399 } 6400 6401 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6402 { 6403 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6404 PetscErrorCode ierr; 6405 6406 PetscFunctionBegin; 6407 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr); 6408 if (f) { 6409 ierr = (*f)(A,n,idx,v);CHKERRQ(ierr); 6410 } else { 6411 const PetscScalar *vv; 6412 6413 ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr); 6414 if (n && idx) { 6415 PetscScalar *w = v; 6416 const PetscInt *oi = idx; 6417 PetscInt j; 6418 6419 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6420 } else { 6421 ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr); 6422 } 6423 ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr); 6424 } 6425 PetscFunctionReturn(0); 6426 } 6427 6428 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6429 { 6430 MatMatMPIAIJBACKEND *mmdata; 6431 PetscInt i,n_d,n_o; 6432 PetscErrorCode ierr; 6433 6434 PetscFunctionBegin; 6435 MatCheckProduct(C,1); 6436 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6437 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6438 if (!mmdata->reusesym) { /* update temporary matrices */ 6439 if (mmdata->P_oth) { 6440 ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6441 } 6442 if (mmdata->Bloc) { 6443 ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr); 6444 } 6445 } 6446 mmdata->reusesym = PETSC_FALSE; 6447 6448 for (i = 0; i < mmdata->cp; i++) { 6449 if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6450 ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr); 6451 } 6452 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6453 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6454 6455 if (mmdata->mptmp[i]) continue; 6456 if (noff) { 6457 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6458 6459 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr); 6460 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr); 6461 n_o += noff; 6462 n_d += nown; 6463 } else { 6464 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6465 6466 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr); 6467 n_d += mm->nz; 6468 } 6469 } 6470 if (mmdata->hasoffproc) { /* offprocess insertion */ 6471 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6472 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6473 } 6474 ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr); 6475 PetscFunctionReturn(0); 6476 } 6477 6478 /* Support for Pt * A, A * P, or Pt * A * P */ 6479 #define MAX_NUMBER_INTERMEDIATE 4 6480 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6481 { 6482 Mat_Product *product = C->product; 6483 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; 6484 Mat_MPIAIJ *a,*p; 6485 MatMatMPIAIJBACKEND *mmdata; 6486 ISLocalToGlobalMapping P_oth_l2g = NULL; 6487 IS glob = NULL; 6488 const char *prefix; 6489 char pprefix[256]; 6490 const PetscInt *globidx,*P_oth_idx; 6491 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; 6492 PetscInt cp = 0,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j,cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE],i,j; 6493 MatProductType ptype; 6494 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6495 PetscMPIInt size; 6496 PetscErrorCode ierr; 6497 6498 PetscFunctionBegin; 6499 MatCheckProduct(C,1); 6500 if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6501 ptype = product->type; 6502 if (product->A->symmetric && ptype == MATPRODUCT_AtB) ptype = MATPRODUCT_AB; 6503 switch (ptype) { 6504 case MATPRODUCT_AB: 6505 A = product->A; 6506 P = product->B; 6507 m = A->rmap->n; 6508 n = P->cmap->n; 6509 M = A->rmap->N; 6510 N = P->cmap->N; 6511 break; 6512 case MATPRODUCT_AtB: 6513 P = product->A; 6514 A = product->B; 6515 m = P->cmap->n; 6516 n = A->cmap->n; 6517 M = P->cmap->N; 6518 N = A->cmap->N; 6519 hasoffproc = PETSC_TRUE; 6520 break; 6521 case MATPRODUCT_PtAP: 6522 A = product->A; 6523 P = product->B; 6524 m = P->cmap->n; 6525 n = P->cmap->n; 6526 M = P->cmap->N; 6527 N = P->cmap->N; 6528 hasoffproc = PETSC_TRUE; 6529 break; 6530 default: 6531 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6532 } 6533 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr); 6534 if (size == 1) hasoffproc = PETSC_FALSE; 6535 6536 /* defaults */ 6537 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6538 mp[i] = NULL; 6539 mptmp[i] = PETSC_FALSE; 6540 rmapt[i] = -1; 6541 cmapt[i] = -1; 6542 rmapa[i] = NULL; 6543 cmapa[i] = NULL; 6544 } 6545 6546 /* customization */ 6547 ierr = PetscNew(&mmdata);CHKERRQ(ierr); 6548 mmdata->reusesym = product->api_user; 6549 if (ptype == MATPRODUCT_AB) { 6550 if (product->api_user) { 6551 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 6552 ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6553 ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6554 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6555 } else { 6556 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 6557 ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6558 ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6559 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6560 } 6561 } else if (ptype == MATPRODUCT_PtAP) { 6562 if (product->api_user) { 6563 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 6564 ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6565 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6566 } else { 6567 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 6568 ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6569 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6570 } 6571 } 6572 a = (Mat_MPIAIJ*)A->data; 6573 p = (Mat_MPIAIJ*)P->data; 6574 ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr); 6575 ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr); 6576 ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr); 6577 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6578 ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr); 6579 switch (ptype) { 6580 case MATPRODUCT_AB: /* A * P */ 6581 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6582 6583 if (mmdata->abmerge) { /* A_diag * P_loc and A_off * P_oth */ 6584 /* P is product->B */ 6585 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6586 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6587 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6588 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6589 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6590 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6591 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6592 mp[cp]->product->api_user = product->api_user; 6593 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6594 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6595 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6596 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6597 rmapt[cp] = 1; 6598 cmapt[cp] = 2; 6599 cmapa[cp] = globidx; 6600 mptmp[cp] = PETSC_FALSE; 6601 cp++; 6602 } else { /* A_diag * P_diag and A_diag * P_off and A_off * P_oth */ 6603 ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr); 6604 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6605 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6606 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6607 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6608 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6609 mp[cp]->product->api_user = product->api_user; 6610 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6611 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6612 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6613 rmapt[cp] = 1; 6614 cmapt[cp] = 1; 6615 mptmp[cp] = PETSC_FALSE; 6616 cp++; 6617 ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr); 6618 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6619 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6620 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6621 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6622 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6623 mp[cp]->product->api_user = product->api_user; 6624 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6625 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6626 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6627 rmapt[cp] = 1; 6628 cmapt[cp] = 2; 6629 cmapa[cp] = p->garray; 6630 mptmp[cp] = PETSC_FALSE; 6631 cp++; 6632 } 6633 if (mmdata->P_oth) { 6634 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); 6635 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6636 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6637 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6638 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6639 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6640 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6641 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6642 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6643 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6644 mp[cp]->product->api_user = product->api_user; 6645 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6646 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6647 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6648 rmapt[cp] = 1; 6649 cmapt[cp] = 2; 6650 cmapa[cp] = P_oth_idx; 6651 mptmp[cp] = PETSC_FALSE; 6652 cp++; 6653 } 6654 break; 6655 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 6656 /* A is product->B */ 6657 ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6658 if (A == P) { 6659 ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6660 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6661 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6662 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6663 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6664 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6665 mp[cp]->product->api_user = product->api_user; 6666 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6667 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6668 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6669 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6670 rmapt[cp] = 2; 6671 rmapa[cp] = globidx; 6672 cmapt[cp] = 2; 6673 cmapa[cp] = globidx; 6674 mptmp[cp] = PETSC_FALSE; 6675 cp++; 6676 } else { 6677 ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6678 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6679 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6680 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6681 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6682 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6683 mp[cp]->product->api_user = product->api_user; 6684 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6685 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6686 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6687 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6688 rmapt[cp] = 1; 6689 cmapt[cp] = 2; 6690 cmapa[cp] = globidx; 6691 mptmp[cp] = PETSC_FALSE; 6692 cp++; 6693 ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6694 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6695 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6696 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6697 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6698 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6699 mp[cp]->product->api_user = product->api_user; 6700 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6701 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6702 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6703 rmapt[cp] = 2; 6704 rmapa[cp] = p->garray; 6705 cmapt[cp] = 2; 6706 cmapa[cp] = globidx; 6707 mptmp[cp] = PETSC_FALSE; 6708 cp++; 6709 } 6710 break; 6711 case MATPRODUCT_PtAP: 6712 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6713 /* P is product->B */ 6714 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6715 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6716 ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr); 6717 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6718 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6719 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6720 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6721 mp[cp]->product->api_user = product->api_user; 6722 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6723 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6724 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6725 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6726 rmapt[cp] = 2; 6727 rmapa[cp] = globidx; 6728 cmapt[cp] = 2; 6729 cmapa[cp] = globidx; 6730 mptmp[cp] = PETSC_FALSE; 6731 cp++; 6732 if (mmdata->P_oth) { 6733 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); 6734 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6735 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6736 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6737 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6738 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6739 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6740 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6741 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6742 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6743 mp[cp]->product->api_user = product->api_user; 6744 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6745 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6746 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6747 mptmp[cp] = PETSC_TRUE; 6748 cp++; 6749 ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr); 6750 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6751 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6752 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6753 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6754 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6755 mp[cp]->product->api_user = product->api_user; 6756 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6757 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6758 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6759 rmapt[cp] = 2; 6760 rmapa[cp] = globidx; 6761 cmapt[cp] = 2; 6762 cmapa[cp] = P_oth_idx; 6763 mptmp[cp] = PETSC_FALSE; 6764 cp++; 6765 } 6766 break; 6767 default: 6768 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6769 } 6770 /* sanity check */ 6771 if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i); 6772 6773 ierr = PetscMalloc1(cp,&mmdata->mp);CHKERRQ(ierr); 6774 for (i = 0; i < cp; i++) mmdata->mp[i] = mp[i]; 6775 ierr = PetscMalloc1(cp,&mmdata->mptmp);CHKERRQ(ierr); 6776 for (i = 0; i < cp; i++) mmdata->mptmp[i] = mptmp[i]; 6777 mmdata->cp = cp; 6778 C->product->data = mmdata; 6779 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 6780 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 6781 6782 /* memory type */ 6783 mmdata->mtype = PETSC_MEMTYPE_HOST; 6784 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr); 6785 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr); 6786 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 6787 // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented 6788 //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE; 6789 6790 /* prepare coo coordinates for values insertion */ 6791 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 6792 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6793 if (mptmp[cp]) continue; 6794 if (rmapt[cp] == 2 && hasoffproc) { 6795 const PetscInt *rmap = rmapa[cp]; 6796 const PetscInt mr = mp[cp]->rmap->n; 6797 const PetscInt rs = C->rmap->rstart; 6798 const PetscInt re = C->rmap->rend; 6799 const PetscInt *ii = mm->i; 6800 for (i = 0; i < mr; i++) { 6801 const PetscInt gr = rmap[i]; 6802 const PetscInt nz = ii[i+1] - ii[i]; 6803 if (gr < rs || gr >= re) ncoo_o += nz; 6804 else ncoo_oown += nz; 6805 } 6806 } else ncoo_d += mm->nz; 6807 } 6808 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); 6809 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr); 6810 if (hasoffproc) { /* handle offproc values insertion */ 6811 PetscSF msf; 6812 PetscInt ncoo2,*coo_i2,*coo_j2; 6813 6814 ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr); 6815 ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr); 6816 ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); 6817 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 6818 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6819 PetscInt *idxoff = mmdata->off[cp]; 6820 PetscInt *idxown = mmdata->own[cp]; 6821 if (!mptmp[cp] && rmapt[cp] == 2) { 6822 const PetscInt *rmap = rmapa[cp]; 6823 const PetscInt *cmap = cmapa[cp]; 6824 const PetscInt *ii = mm->i; 6825 PetscInt *coi = coo_i + ncoo_o; 6826 PetscInt *coj = coo_j + ncoo_o; 6827 const PetscInt mr = mp[cp]->rmap->n; 6828 const PetscInt rs = C->rmap->rstart; 6829 const PetscInt re = C->rmap->rend; 6830 const PetscInt cs = C->cmap->rstart; 6831 for (i = 0; i < mr; i++) { 6832 const PetscInt *jj = mm->j + ii[i]; 6833 const PetscInt gr = rmap[i]; 6834 const PetscInt nz = ii[i+1] - ii[i]; 6835 if (gr < rs || gr >= re) { 6836 for (j = ii[i]; j < ii[i+1]; j++) { 6837 *coi++ = gr; 6838 *idxoff++ = j; 6839 } 6840 if (!cmapt[cp]) { /* already global */ 6841 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6842 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6843 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6844 } else { /* offdiag */ 6845 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6846 } 6847 ncoo_o += nz; 6848 } else { 6849 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 6850 } 6851 } 6852 } 6853 mmdata->off[cp + 1] = idxoff; 6854 mmdata->own[cp + 1] = idxown; 6855 } 6856 6857 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6858 ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o,NULL,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr); 6859 ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr); 6860 ierr = PetscSFGetGraph(msf,&ncoo2,NULL,NULL,NULL);CHKERRQ(ierr); 6861 ncoo = ncoo_d + ncoo_oown + ncoo2; 6862 ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr); 6863 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6864 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6865 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6866 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6867 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6868 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr); 6869 coo_i = coo_i2; 6870 coo_j = coo_j2; 6871 } else { /* no offproc values insertion */ 6872 ncoo = ncoo_d; 6873 ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr); 6874 6875 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6876 ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr); 6877 ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr); 6878 } 6879 mmdata->hasoffproc = hasoffproc; 6880 6881 /* on-process indices */ 6882 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 6883 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6884 PetscInt *coi = coo_i + ncoo_d; 6885 PetscInt *coj = coo_j + ncoo_d; 6886 const PetscInt *jj = mm->j; 6887 const PetscInt *ii = mm->i; 6888 const PetscInt *cmap = cmapa[cp]; 6889 const PetscInt *rmap = rmapa[cp]; 6890 const PetscInt mr = mp[cp]->rmap->n; 6891 const PetscInt rs = C->rmap->rstart; 6892 const PetscInt re = C->rmap->rend; 6893 const PetscInt cs = C->cmap->rstart; 6894 6895 if (mptmp[cp]) continue; 6896 if (rmapt[cp] == 1) { 6897 for (i = 0; i < mr; i++) { 6898 const PetscInt gr = i + rs; 6899 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 6900 } 6901 /* columns coo */ 6902 if (!cmapt[cp]) { 6903 ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr); 6904 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6905 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; 6906 } else { /* offdiag */ 6907 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 6908 } 6909 ncoo_d += mm->nz; 6910 } else if (rmapt[cp] == 2) { 6911 for (i = 0; i < mr; i++) { 6912 const PetscInt *jj = mm->j + ii[i]; 6913 const PetscInt gr = rmap[i]; 6914 const PetscInt nz = ii[i+1] - ii[i]; 6915 if (gr >= rs && gr < re) { 6916 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 6917 if (!cmapt[cp]) { /* already global */ 6918 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6919 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6920 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6921 } else { /* offdiag */ 6922 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6923 } 6924 ncoo_d += nz; 6925 } 6926 } 6927 } 6928 } 6929 if (glob) { 6930 ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr); 6931 } 6932 ierr = ISDestroy(&glob);CHKERRQ(ierr); 6933 if (P_oth_l2g) { 6934 ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6935 } 6936 ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr); 6937 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr); 6938 6939 /* preallocate with COO data */ 6940 ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr); 6941 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6942 PetscFunctionReturn(0); 6943 } 6944 6945 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 6946 { 6947 Mat_Product *product = mat->product; 6948 PetscErrorCode ierr; 6949 #if defined(PETSC_HAVE_DEVICE) 6950 PetscBool match = PETSC_FALSE; 6951 PetscBool usecpu = PETSC_FALSE; 6952 #else 6953 PetscBool match = PETSC_TRUE; 6954 #endif 6955 6956 PetscFunctionBegin; 6957 MatCheckProduct(mat,1); 6958 #if defined(PETSC_HAVE_DEVICE) 6959 if (!product->A->boundtocpu && !product->B->boundtocpu) { 6960 ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr); 6961 } 6962 if (match) { /* we can always fallback to the CPU if requested */ 6963 switch (product->type) { 6964 case MATPRODUCT_AB: 6965 if (product->api_user) { 6966 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 6967 ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6968 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6969 } else { 6970 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 6971 ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6972 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6973 } 6974 break; 6975 case MATPRODUCT_AtB: 6976 if (product->api_user) { 6977 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr); 6978 ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6979 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6980 } else { 6981 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr); 6982 ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6983 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6984 } 6985 break; 6986 case MATPRODUCT_PtAP: 6987 if (product->api_user) { 6988 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 6989 ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6990 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6991 } else { 6992 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 6993 ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6994 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6995 } 6996 break; 6997 default: 6998 break; 6999 } 7000 match = (PetscBool)!usecpu; 7001 } 7002 #endif 7003 if (match) { 7004 switch (product->type) { 7005 case MATPRODUCT_AB: 7006 case MATPRODUCT_AtB: 7007 case MATPRODUCT_PtAP: 7008 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7009 break; 7010 default: 7011 break; 7012 } 7013 } 7014 /* fallback to MPIAIJ ops */ 7015 if (!mat->ops->productsymbolic) { 7016 ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr); 7017 } 7018 PetscFunctionReturn(0); 7019 } 7020