1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb,*aav,*bav; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = NULL; 92 93 ia = a->i; 94 ib = b->i; 95 ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr); 96 ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr); 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) { 101 cnt++; 102 goto ok1; 103 } 104 aa = aav + ia[i]; 105 for (j=0; j<na; j++) { 106 if (aa[j] != 0.0) goto ok1; 107 } 108 bb = bav + ib[i]; 109 for (j=0; j <nb; j++) { 110 if (bb[j] != 0.0) goto ok1; 111 } 112 cnt++; 113 ok1:; 114 } 115 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 116 if (!n0rows) { 117 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 118 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 122 cnt = 0; 123 for (i=0; i<m; i++) { 124 na = ia[i+1] - ia[i]; 125 nb = ib[i+1] - ib[i]; 126 if (!na && !nb) continue; 127 aa = aav + ia[i]; 128 for (j=0; j<na;j++) { 129 if (aa[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 bb = bav + ib[i]; 135 for (j=0; j<nb; j++) { 136 if (bb[j] != 0.0) { 137 rows[cnt++] = rstart + i; 138 goto ok2; 139 } 140 } 141 ok2:; 142 } 143 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 144 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 145 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 150 { 151 PetscErrorCode ierr; 152 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 153 PetscBool cong; 154 155 PetscFunctionBegin; 156 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 157 if (Y->assembled && cong) { 158 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 159 } else { 160 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 161 } 162 PetscFunctionReturn(0); 163 } 164 165 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 166 { 167 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 168 PetscErrorCode ierr; 169 PetscInt i,rstart,nrows,*rows; 170 171 PetscFunctionBegin; 172 *zrows = NULL; 173 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 174 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 175 for (i=0; i<nrows; i++) rows[i] += rstart; 176 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 181 { 182 PetscErrorCode ierr; 183 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 184 PetscInt i,n,*garray = aij->garray; 185 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 186 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 187 PetscReal *work; 188 const PetscScalar *dummy; 189 190 PetscFunctionBegin; 191 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 192 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 193 ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr); 194 ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr); 195 ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr); 196 ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr); 197 if (type == NORM_2) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 203 } 204 } else if (type == NORM_1) { 205 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 206 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 207 } 208 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 209 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 210 } 211 } else if (type == NORM_INFINITY) { 212 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 213 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 214 } 215 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 216 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 217 } 218 219 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 220 if (type == NORM_INFINITY) { 221 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 222 } else { 223 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 224 } 225 ierr = PetscFree(work);CHKERRQ(ierr); 226 if (type == NORM_2) { 227 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 228 } 229 PetscFunctionReturn(0); 230 } 231 232 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 233 { 234 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 235 IS sis,gis; 236 PetscErrorCode ierr; 237 const PetscInt *isis,*igis; 238 PetscInt n,*iis,nsis,ngis,rstart,i; 239 240 PetscFunctionBegin; 241 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 242 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 243 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 244 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 245 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 246 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 247 248 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 249 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 250 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 251 n = ngis + nsis; 252 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 253 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 254 for (i=0; i<n; i++) iis[i] += rstart; 255 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 256 257 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 258 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 259 ierr = ISDestroy(&sis);CHKERRQ(ierr); 260 ierr = ISDestroy(&gis);CHKERRQ(ierr); 261 PetscFunctionReturn(0); 262 } 263 264 /* 265 Local utility routine that creates a mapping from the global column 266 number to the local number in the off-diagonal part of the local 267 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 268 a slightly higher hash table cost; without it it is not scalable (each processor 269 has an order N integer array but is fast to access. 270 */ 271 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 272 { 273 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 274 PetscErrorCode ierr; 275 PetscInt n = aij->B->cmap->n,i; 276 277 PetscFunctionBegin; 278 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 279 #if defined(PETSC_USE_CTABLE) 280 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 281 for (i=0; i<n; i++) { 282 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 283 } 284 #else 285 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 286 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 287 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 288 #endif 289 PetscFunctionReturn(0); 290 } 291 292 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 293 { \ 294 if (col <= lastcol1) low1 = 0; \ 295 else high1 = nrow1; \ 296 lastcol1 = col;\ 297 while (high1-low1 > 5) { \ 298 t = (low1+high1)/2; \ 299 if (rp1[t] > col) high1 = t; \ 300 else low1 = t; \ 301 } \ 302 for (_i=low1; _i<high1; _i++) { \ 303 if (rp1[_i] > col) break; \ 304 if (rp1[_i] == col) { \ 305 if (addv == ADD_VALUES) { \ 306 ap1[_i] += value; \ 307 /* Not sure LogFlops will slow dow the code or not */ \ 308 (void)PetscLogFlops(1.0); \ 309 } \ 310 else ap1[_i] = value; \ 311 inserted = PETSC_TRUE; \ 312 goto a_noinsert; \ 313 } \ 314 } \ 315 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 316 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 317 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 318 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 319 N = nrow1++ - 1; a->nz++; high1++; \ 320 /* shift up all the later entries in this row */ \ 321 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 322 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 323 rp1[_i] = col; \ 324 ap1[_i] = value; \ 325 A->nonzerostate++;\ 326 a_noinsert: ; \ 327 ailen[row] = nrow1; \ 328 } 329 330 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 331 { \ 332 if (col <= lastcol2) low2 = 0; \ 333 else high2 = nrow2; \ 334 lastcol2 = col; \ 335 while (high2-low2 > 5) { \ 336 t = (low2+high2)/2; \ 337 if (rp2[t] > col) high2 = t; \ 338 else low2 = t; \ 339 } \ 340 for (_i=low2; _i<high2; _i++) { \ 341 if (rp2[_i] > col) break; \ 342 if (rp2[_i] == col) { \ 343 if (addv == ADD_VALUES) { \ 344 ap2[_i] += value; \ 345 (void)PetscLogFlops(1.0); \ 346 } \ 347 else ap2[_i] = value; \ 348 inserted = PETSC_TRUE; \ 349 goto b_noinsert; \ 350 } \ 351 } \ 352 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 353 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 354 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 355 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 356 N = nrow2++ - 1; b->nz++; high2++; \ 357 /* shift up all the later entries in this row */ \ 358 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 359 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 360 rp2[_i] = col; \ 361 ap2[_i] = value; \ 362 B->nonzerostate++; \ 363 b_noinsert: ; \ 364 bilen[row] = nrow2; \ 365 } 366 367 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 368 { 369 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 370 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 371 PetscErrorCode ierr; 372 PetscInt l,*garray = mat->garray,diag; 373 374 PetscFunctionBegin; 375 /* code only works for square matrices A */ 376 377 /* find size of row to the left of the diagonal part */ 378 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 379 row = row - diag; 380 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 381 if (garray[b->j[b->i[row]+l]] > diag) break; 382 } 383 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 384 385 /* diagonal part */ 386 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 387 388 /* right of diagonal part */ 389 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 390 #if defined(PETSC_HAVE_DEVICE) 391 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 392 #endif 393 PetscFunctionReturn(0); 394 } 395 396 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 397 { 398 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 399 PetscScalar value = 0.0; 400 PetscErrorCode ierr; 401 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 402 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 403 PetscBool roworiented = aij->roworiented; 404 405 /* Some Variables required in the macro */ 406 Mat A = aij->A; 407 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 408 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 409 PetscBool ignorezeroentries = a->ignorezeroentries; 410 Mat B = aij->B; 411 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 412 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 413 MatScalar *aa,*ba; 414 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 415 * cannot use "#if defined" inside a macro. */ 416 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 417 418 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 419 PetscInt nonew; 420 MatScalar *ap1,*ap2; 421 422 PetscFunctionBegin; 423 #if defined(PETSC_HAVE_DEVICE) 424 if (A->offloadmask == PETSC_OFFLOAD_GPU) { 425 const PetscScalar *dummy; 426 ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr); 427 ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr); 428 } 429 if (B->offloadmask == PETSC_OFFLOAD_GPU) { 430 const PetscScalar *dummy; 431 ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr); 432 ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr); 433 } 434 #endif 435 aa = a->a; 436 ba = b->a; 437 for (i=0; i<m; i++) { 438 if (im[i] < 0) continue; 439 if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 440 if (im[i] >= rstart && im[i] < rend) { 441 row = im[i] - rstart; 442 lastcol1 = -1; 443 rp1 = aj + ai[row]; 444 ap1 = aa + ai[row]; 445 rmax1 = aimax[row]; 446 nrow1 = ailen[row]; 447 low1 = 0; 448 high1 = nrow1; 449 lastcol2 = -1; 450 rp2 = bj + bi[row]; 451 ap2 = ba + bi[row]; 452 rmax2 = bimax[row]; 453 nrow2 = bilen[row]; 454 low2 = 0; 455 high2 = nrow2; 456 457 for (j=0; j<n; j++) { 458 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 459 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 460 if (in[j] >= cstart && in[j] < cend) { 461 col = in[j] - cstart; 462 nonew = a->nonew; 463 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 464 #if defined(PETSC_HAVE_DEVICE) 465 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 466 #endif 467 } else if (in[j] < 0) continue; 468 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 469 else { 470 if (mat->was_assembled) { 471 if (!aij->colmap) { 472 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 473 } 474 #if defined(PETSC_USE_CTABLE) 475 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 476 col--; 477 #else 478 col = aij->colmap[in[j]] - 1; 479 #endif 480 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 481 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 482 col = in[j]; 483 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 484 B = aij->B; 485 b = (Mat_SeqAIJ*)B->data; 486 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 487 rp2 = bj + bi[row]; 488 ap2 = ba + bi[row]; 489 rmax2 = bimax[row]; 490 nrow2 = bilen[row]; 491 low2 = 0; 492 high2 = nrow2; 493 bm = aij->B->rmap->n; 494 ba = b->a; 495 inserted = PETSC_FALSE; 496 } else if (col < 0) { 497 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 498 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 499 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 500 } 501 } else col = in[j]; 502 nonew = b->nonew; 503 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 504 #if defined(PETSC_HAVE_DEVICE) 505 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 506 #endif 507 } 508 } 509 } else { 510 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 511 if (!aij->donotstash) { 512 mat->assembled = PETSC_FALSE; 513 if (roworiented) { 514 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 515 } else { 516 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 517 } 518 } 519 } 520 } 521 PetscFunctionReturn(0); 522 } 523 524 /* 525 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 526 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 527 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 528 */ 529 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 530 { 531 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 532 Mat A = aij->A; /* diagonal part of the matrix */ 533 Mat B = aij->B; /* offdiagonal part of the matrix */ 534 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 535 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 536 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 537 PetscInt *ailen = a->ilen,*aj = a->j; 538 PetscInt *bilen = b->ilen,*bj = b->j; 539 PetscInt am = aij->A->rmap->n,j; 540 PetscInt diag_so_far = 0,dnz; 541 PetscInt offd_so_far = 0,onz; 542 543 PetscFunctionBegin; 544 /* Iterate over all rows of the matrix */ 545 for (j=0; j<am; j++) { 546 dnz = onz = 0; 547 /* Iterate over all non-zero columns of the current row */ 548 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 549 /* If column is in the diagonal */ 550 if (mat_j[col] >= cstart && mat_j[col] < cend) { 551 aj[diag_so_far++] = mat_j[col] - cstart; 552 dnz++; 553 } else { /* off-diagonal entries */ 554 bj[offd_so_far++] = mat_j[col]; 555 onz++; 556 } 557 } 558 ailen[j] = dnz; 559 bilen[j] = onz; 560 } 561 PetscFunctionReturn(0); 562 } 563 564 /* 565 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 566 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 567 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 568 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 569 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 570 */ 571 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 572 { 573 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 574 Mat A = aij->A; /* diagonal part of the matrix */ 575 Mat B = aij->B; /* offdiagonal part of the matrix */ 576 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 577 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 578 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 579 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 580 PetscInt *ailen = a->ilen,*aj = a->j; 581 PetscInt *bilen = b->ilen,*bj = b->j; 582 PetscInt am = aij->A->rmap->n,j; 583 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 584 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 585 PetscScalar *aa = a->a,*ba = b->a; 586 587 PetscFunctionBegin; 588 /* Iterate over all rows of the matrix */ 589 for (j=0; j<am; j++) { 590 dnz_row = onz_row = 0; 591 rowstart_offd = full_offd_i[j]; 592 rowstart_diag = full_diag_i[j]; 593 /* Iterate over all non-zero columns of the current row */ 594 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 595 /* If column is in the diagonal */ 596 if (mat_j[col] >= cstart && mat_j[col] < cend) { 597 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 598 aa[rowstart_diag+dnz_row] = mat_a[col]; 599 dnz_row++; 600 } else { /* off-diagonal entries */ 601 bj[rowstart_offd+onz_row] = mat_j[col]; 602 ba[rowstart_offd+onz_row] = mat_a[col]; 603 onz_row++; 604 } 605 } 606 ailen[j] = dnz_row; 607 bilen[j] = onz_row; 608 } 609 PetscFunctionReturn(0); 610 } 611 612 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 613 { 614 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 615 PetscErrorCode ierr; 616 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 617 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 618 619 PetscFunctionBegin; 620 for (i=0; i<m; i++) { 621 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 622 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 623 if (idxm[i] >= rstart && idxm[i] < rend) { 624 row = idxm[i] - rstart; 625 for (j=0; j<n; j++) { 626 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 627 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 628 if (idxn[j] >= cstart && idxn[j] < cend) { 629 col = idxn[j] - cstart; 630 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 631 } else { 632 if (!aij->colmap) { 633 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 634 } 635 #if defined(PETSC_USE_CTABLE) 636 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 637 col--; 638 #else 639 col = aij->colmap[idxn[j]] - 1; 640 #endif 641 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 642 else { 643 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 644 } 645 } 646 } 647 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 648 } 649 PetscFunctionReturn(0); 650 } 651 652 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 653 { 654 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 655 PetscErrorCode ierr; 656 PetscInt nstash,reallocs; 657 658 PetscFunctionBegin; 659 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 660 661 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 662 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 663 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 668 { 669 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 670 PetscErrorCode ierr; 671 PetscMPIInt n; 672 PetscInt i,j,rstart,ncols,flg; 673 PetscInt *row,*col; 674 PetscBool other_disassembled; 675 PetscScalar *val; 676 677 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 678 679 PetscFunctionBegin; 680 if (!aij->donotstash && !mat->nooffprocentries) { 681 while (1) { 682 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 683 if (!flg) break; 684 685 for (i=0; i<n;) { 686 /* Now identify the consecutive vals belonging to the same row */ 687 for (j=i,rstart=row[j]; j<n; j++) { 688 if (row[j] != rstart) break; 689 } 690 if (j < n) ncols = j-i; 691 else ncols = n-i; 692 /* Now assemble all these values with a single function call */ 693 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 694 i = j; 695 } 696 } 697 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 698 } 699 #if defined(PETSC_HAVE_DEVICE) 700 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 701 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 702 if (mat->boundtocpu) { 703 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 704 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 705 } 706 #endif 707 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 708 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 709 710 /* determine if any processor has disassembled, if so we must 711 also disassemble ourself, in order that we may reassemble. */ 712 /* 713 if nonzero structure of submatrix B cannot change then we know that 714 no processor disassembled thus we can skip this stuff 715 */ 716 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 717 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 718 if (mat->was_assembled && !other_disassembled) { 719 #if defined(PETSC_HAVE_DEVICE) 720 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 721 #endif 722 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 723 } 724 } 725 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 726 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 727 } 728 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 729 #if defined(PETSC_HAVE_DEVICE) 730 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 731 #endif 732 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 733 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 734 735 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 736 737 aij->rowvalues = NULL; 738 739 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 740 741 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 742 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 743 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 744 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 745 } 746 #if defined(PETSC_HAVE_DEVICE) 747 mat->offloadmask = PETSC_OFFLOAD_BOTH; 748 #endif 749 PetscFunctionReturn(0); 750 } 751 752 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 753 { 754 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 755 PetscErrorCode ierr; 756 757 PetscFunctionBegin; 758 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 759 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 760 PetscFunctionReturn(0); 761 } 762 763 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 764 { 765 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 766 PetscObjectState sA, sB; 767 PetscInt *lrows; 768 PetscInt r, len; 769 PetscBool cong, lch, gch; 770 PetscErrorCode ierr; 771 772 PetscFunctionBegin; 773 /* get locally owned rows */ 774 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 775 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 776 /* fix right hand side if needed */ 777 if (x && b) { 778 const PetscScalar *xx; 779 PetscScalar *bb; 780 781 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 782 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 783 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 784 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 785 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 786 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 787 } 788 789 sA = mat->A->nonzerostate; 790 sB = mat->B->nonzerostate; 791 792 if (diag != 0.0 && cong) { 793 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 794 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 795 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 796 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 797 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 798 PetscInt nnwA, nnwB; 799 PetscBool nnzA, nnzB; 800 801 nnwA = aijA->nonew; 802 nnwB = aijB->nonew; 803 nnzA = aijA->keepnonzeropattern; 804 nnzB = aijB->keepnonzeropattern; 805 if (!nnzA) { 806 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 807 aijA->nonew = 0; 808 } 809 if (!nnzB) { 810 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 811 aijB->nonew = 0; 812 } 813 /* Must zero here before the next loop */ 814 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 815 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 816 for (r = 0; r < len; ++r) { 817 const PetscInt row = lrows[r] + A->rmap->rstart; 818 if (row >= A->cmap->N) continue; 819 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 820 } 821 aijA->nonew = nnwA; 822 aijB->nonew = nnwB; 823 } else { 824 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 825 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 826 } 827 ierr = PetscFree(lrows);CHKERRQ(ierr); 828 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 829 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 830 831 /* reduce nonzerostate */ 832 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 833 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 834 if (gch) A->nonzerostate++; 835 PetscFunctionReturn(0); 836 } 837 838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 839 { 840 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 841 PetscErrorCode ierr; 842 PetscMPIInt n = A->rmap->n; 843 PetscInt i,j,r,m,len = 0; 844 PetscInt *lrows,*owners = A->rmap->range; 845 PetscMPIInt p = 0; 846 PetscSFNode *rrows; 847 PetscSF sf; 848 const PetscScalar *xx; 849 PetscScalar *bb,*mask; 850 Vec xmask,lmask; 851 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 852 const PetscInt *aj, *ii,*ridx; 853 PetscScalar *aa; 854 855 PetscFunctionBegin; 856 /* Create SF where leaves are input rows and roots are owned rows */ 857 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 858 for (r = 0; r < n; ++r) lrows[r] = -1; 859 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 860 for (r = 0; r < N; ++r) { 861 const PetscInt idx = rows[r]; 862 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 863 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 864 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 865 } 866 rrows[r].rank = p; 867 rrows[r].index = rows[r] - owners[p]; 868 } 869 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 870 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 871 /* Collect flags for rows to be zeroed */ 872 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 873 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 874 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 875 /* Compress and put in row numbers */ 876 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 877 /* zero diagonal part of matrix */ 878 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 879 /* handle off diagonal part of matrix */ 880 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 881 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 882 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 883 for (i=0; i<len; i++) bb[lrows[i]] = 1; 884 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 885 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 887 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 888 if (x && b) { /* this code is buggy when the row and column layout don't match */ 889 PetscBool cong; 890 891 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 892 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 893 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 894 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 895 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 896 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 897 } 898 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 899 /* remove zeroed rows of off diagonal matrix */ 900 ii = aij->i; 901 for (i=0; i<len; i++) { 902 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 903 } 904 /* loop over all elements of off process part of matrix zeroing removed columns*/ 905 if (aij->compressedrow.use) { 906 m = aij->compressedrow.nrows; 907 ii = aij->compressedrow.i; 908 ridx = aij->compressedrow.rindex; 909 for (i=0; i<m; i++) { 910 n = ii[i+1] - ii[i]; 911 aj = aij->j + ii[i]; 912 aa = aij->a + ii[i]; 913 914 for (j=0; j<n; j++) { 915 if (PetscAbsScalar(mask[*aj])) { 916 if (b) bb[*ridx] -= *aa*xx[*aj]; 917 *aa = 0.0; 918 } 919 aa++; 920 aj++; 921 } 922 ridx++; 923 } 924 } else { /* do not use compressed row format */ 925 m = l->B->rmap->n; 926 for (i=0; i<m; i++) { 927 n = ii[i+1] - ii[i]; 928 aj = aij->j + ii[i]; 929 aa = aij->a + ii[i]; 930 for (j=0; j<n; j++) { 931 if (PetscAbsScalar(mask[*aj])) { 932 if (b) bb[i] -= *aa*xx[*aj]; 933 *aa = 0.0; 934 } 935 aa++; 936 aj++; 937 } 938 } 939 } 940 if (x && b) { 941 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 942 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 943 } 944 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 945 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 946 ierr = PetscFree(lrows);CHKERRQ(ierr); 947 948 /* only change matrix nonzero state if pattern was allowed to be changed */ 949 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 950 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 951 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 952 } 953 PetscFunctionReturn(0); 954 } 955 956 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 957 { 958 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 959 PetscErrorCode ierr; 960 PetscInt nt; 961 VecScatter Mvctx = a->Mvctx; 962 963 PetscFunctionBegin; 964 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 965 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 966 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 967 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 968 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 969 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } 972 973 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 974 { 975 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 976 PetscErrorCode ierr; 977 978 PetscFunctionBegin; 979 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 980 PetscFunctionReturn(0); 981 } 982 983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 984 { 985 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 986 PetscErrorCode ierr; 987 VecScatter Mvctx = a->Mvctx; 988 989 PetscFunctionBegin; 990 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 991 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 992 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 993 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } 996 997 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 998 { 999 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1000 PetscErrorCode ierr; 1001 1002 PetscFunctionBegin; 1003 /* do nondiagonal part */ 1004 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1005 /* do local part */ 1006 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1007 /* add partial results together */ 1008 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1009 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } 1012 1013 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1014 { 1015 MPI_Comm comm; 1016 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1017 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1018 IS Me,Notme; 1019 PetscErrorCode ierr; 1020 PetscInt M,N,first,last,*notme,i; 1021 PetscBool lf; 1022 PetscMPIInt size; 1023 1024 PetscFunctionBegin; 1025 /* Easy test: symmetric diagonal block */ 1026 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1027 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1028 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1029 if (!*f) PetscFunctionReturn(0); 1030 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1031 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 1032 if (size == 1) PetscFunctionReturn(0); 1033 1034 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1035 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1036 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1037 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1038 for (i=0; i<first; i++) notme[i] = i; 1039 for (i=last; i<M; i++) notme[i-last+first] = i; 1040 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1041 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1042 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1043 Aoff = Aoffs[0]; 1044 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1045 Boff = Boffs[0]; 1046 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1047 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1048 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1049 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1050 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1051 ierr = PetscFree(notme);CHKERRQ(ierr); 1052 PetscFunctionReturn(0); 1053 } 1054 1055 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1056 { 1057 PetscErrorCode ierr; 1058 1059 PetscFunctionBegin; 1060 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1061 PetscFunctionReturn(0); 1062 } 1063 1064 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1065 { 1066 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1067 PetscErrorCode ierr; 1068 1069 PetscFunctionBegin; 1070 /* do nondiagonal part */ 1071 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1072 /* do local part */ 1073 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1074 /* add partial results together */ 1075 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1076 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1077 PetscFunctionReturn(0); 1078 } 1079 1080 /* 1081 This only works correctly for square matrices where the subblock A->A is the 1082 diagonal block 1083 */ 1084 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1085 { 1086 PetscErrorCode ierr; 1087 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1088 1089 PetscFunctionBegin; 1090 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1091 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1092 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1097 { 1098 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1099 PetscErrorCode ierr; 1100 1101 PetscFunctionBegin; 1102 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1103 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1108 { 1109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1110 PetscErrorCode ierr; 1111 1112 PetscFunctionBegin; 1113 #if defined(PETSC_USE_LOG) 1114 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1115 #endif 1116 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1117 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1118 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1119 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1120 #if defined(PETSC_USE_CTABLE) 1121 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1122 #else 1123 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1124 #endif 1125 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1126 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1127 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1128 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1129 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1130 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1131 1132 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1133 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1134 1135 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1136 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1137 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1138 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1139 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1140 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1141 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1142 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1143 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1144 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1145 #if defined(PETSC_HAVE_CUDA) 1146 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1147 #endif 1148 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1149 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr); 1150 #endif 1151 #if defined(PETSC_HAVE_ELEMENTAL) 1152 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1153 #endif 1154 #if defined(PETSC_HAVE_SCALAPACK) 1155 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1156 #endif 1157 #if defined(PETSC_HAVE_HYPRE) 1158 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1159 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1160 #endif 1161 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1162 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1163 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1164 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr); 1165 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr); 1167 #if defined(PETSC_HAVE_MKL_SPARSE) 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr); 1169 #endif 1170 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr); 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr); 1173 PetscFunctionReturn(0); 1174 } 1175 1176 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1177 { 1178 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1179 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1180 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1181 const PetscInt *garray = aij->garray; 1182 const PetscScalar *aa,*ba; 1183 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1184 PetscInt *rowlens; 1185 PetscInt *colidxs; 1186 PetscScalar *matvals; 1187 PetscErrorCode ierr; 1188 1189 PetscFunctionBegin; 1190 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1191 1192 M = mat->rmap->N; 1193 N = mat->cmap->N; 1194 m = mat->rmap->n; 1195 rs = mat->rmap->rstart; 1196 cs = mat->cmap->rstart; 1197 nz = A->nz + B->nz; 1198 1199 /* write matrix header */ 1200 header[0] = MAT_FILE_CLASSID; 1201 header[1] = M; header[2] = N; header[3] = nz; 1202 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1203 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1204 1205 /* fill in and store row lengths */ 1206 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1207 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1208 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1209 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1210 1211 /* fill in and store column indices */ 1212 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1213 for (cnt=0, i=0; i<m; i++) { 1214 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1215 if (garray[B->j[jb]] > cs) break; 1216 colidxs[cnt++] = garray[B->j[jb]]; 1217 } 1218 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1219 colidxs[cnt++] = A->j[ja] + cs; 1220 for (; jb<B->i[i+1]; jb++) 1221 colidxs[cnt++] = garray[B->j[jb]]; 1222 } 1223 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1224 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1225 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1226 1227 /* fill in and store nonzero values */ 1228 ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr); 1229 ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr); 1230 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1231 for (cnt=0, i=0; i<m; i++) { 1232 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1233 if (garray[B->j[jb]] > cs) break; 1234 matvals[cnt++] = ba[jb]; 1235 } 1236 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1237 matvals[cnt++] = aa[ja]; 1238 for (; jb<B->i[i+1]; jb++) 1239 matvals[cnt++] = ba[jb]; 1240 } 1241 ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr); 1242 ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr); 1243 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1244 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1245 ierr = PetscFree(matvals);CHKERRQ(ierr); 1246 1247 /* write block size option to the viewer's .info file */ 1248 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #include <petscdraw.h> 1253 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1254 { 1255 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1256 PetscErrorCode ierr; 1257 PetscMPIInt rank = aij->rank,size = aij->size; 1258 PetscBool isdraw,iascii,isbinary; 1259 PetscViewer sviewer; 1260 PetscViewerFormat format; 1261 1262 PetscFunctionBegin; 1263 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1264 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1265 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1266 if (iascii) { 1267 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1268 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1269 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1270 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1271 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1272 for (i=0; i<(PetscInt)size; i++) { 1273 nmax = PetscMax(nmax,nz[i]); 1274 nmin = PetscMin(nmin,nz[i]); 1275 navg += nz[i]; 1276 } 1277 ierr = PetscFree(nz);CHKERRQ(ierr); 1278 navg = navg/size; 1279 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1280 PetscFunctionReturn(0); 1281 } 1282 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1283 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1284 MatInfo info; 1285 PetscBool inodes; 1286 1287 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 1288 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1289 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1290 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1291 if (!inodes) { 1292 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1293 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1294 } else { 1295 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1296 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1297 } 1298 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1299 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1300 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1301 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1302 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1303 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1304 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1305 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1308 PetscInt inodecount,inodelimit,*inodes; 1309 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1310 if (inodes) { 1311 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1312 } else { 1313 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1314 } 1315 PetscFunctionReturn(0); 1316 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1317 PetscFunctionReturn(0); 1318 } 1319 } else if (isbinary) { 1320 if (size == 1) { 1321 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1322 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1323 } else { 1324 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1325 } 1326 PetscFunctionReturn(0); 1327 } else if (iascii && size == 1) { 1328 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1329 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1330 PetscFunctionReturn(0); 1331 } else if (isdraw) { 1332 PetscDraw draw; 1333 PetscBool isnull; 1334 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1335 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1336 if (isnull) PetscFunctionReturn(0); 1337 } 1338 1339 { /* assemble the entire matrix onto first processor */ 1340 Mat A = NULL, Av; 1341 IS isrow,iscol; 1342 1343 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1344 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1345 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1346 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1347 /* The commented code uses MatCreateSubMatrices instead */ 1348 /* 1349 Mat *AA, A = NULL, Av; 1350 IS isrow,iscol; 1351 1352 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1353 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1354 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1355 if (!rank) { 1356 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1357 A = AA[0]; 1358 Av = AA[0]; 1359 } 1360 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1361 */ 1362 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1363 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1364 /* 1365 Everyone has to call to draw the matrix since the graphics waits are 1366 synchronized across all processors that share the PetscDraw object 1367 */ 1368 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1369 if (!rank) { 1370 if (((PetscObject)mat)->name) { 1371 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1372 } 1373 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1374 } 1375 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1376 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1377 ierr = MatDestroy(&A);CHKERRQ(ierr); 1378 } 1379 PetscFunctionReturn(0); 1380 } 1381 1382 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1383 { 1384 PetscErrorCode ierr; 1385 PetscBool iascii,isdraw,issocket,isbinary; 1386 1387 PetscFunctionBegin; 1388 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1389 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1390 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1391 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1392 if (iascii || isdraw || isbinary || issocket) { 1393 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1394 } 1395 PetscFunctionReturn(0); 1396 } 1397 1398 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1399 { 1400 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1401 PetscErrorCode ierr; 1402 Vec bb1 = NULL; 1403 PetscBool hasop; 1404 1405 PetscFunctionBegin; 1406 if (flag == SOR_APPLY_UPPER) { 1407 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1412 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1413 } 1414 1415 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1416 if (flag & SOR_ZERO_INITIAL_GUESS) { 1417 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1418 its--; 1419 } 1420 1421 while (its--) { 1422 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1423 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1424 1425 /* update rhs: bb1 = bb - B*x */ 1426 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1427 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1428 1429 /* local sweep */ 1430 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1431 } 1432 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1433 if (flag & SOR_ZERO_INITIAL_GUESS) { 1434 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1435 its--; 1436 } 1437 while (its--) { 1438 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1439 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1440 1441 /* update rhs: bb1 = bb - B*x */ 1442 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1443 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1444 1445 /* local sweep */ 1446 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1447 } 1448 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1449 if (flag & SOR_ZERO_INITIAL_GUESS) { 1450 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1451 its--; 1452 } 1453 while (its--) { 1454 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1455 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1456 1457 /* update rhs: bb1 = bb - B*x */ 1458 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1459 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1460 1461 /* local sweep */ 1462 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1463 } 1464 } else if (flag & SOR_EISENSTAT) { 1465 Vec xx1; 1466 1467 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1468 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1469 1470 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1471 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1472 if (!mat->diag) { 1473 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1474 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1475 } 1476 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1477 if (hasop) { 1478 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1479 } else { 1480 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1481 } 1482 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1483 1484 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1485 1486 /* local sweep */ 1487 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1488 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1489 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1490 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1491 1492 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1493 1494 matin->factorerrortype = mat->A->factorerrortype; 1495 PetscFunctionReturn(0); 1496 } 1497 1498 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1499 { 1500 Mat aA,aB,Aperm; 1501 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1502 PetscScalar *aa,*ba; 1503 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1504 PetscSF rowsf,sf; 1505 IS parcolp = NULL; 1506 PetscBool done; 1507 PetscErrorCode ierr; 1508 1509 PetscFunctionBegin; 1510 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1511 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1512 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1513 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1514 1515 /* Invert row permutation to find out where my rows should go */ 1516 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1517 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1518 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1519 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1520 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1521 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1522 1523 /* Invert column permutation to find out where my columns should go */ 1524 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1525 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1526 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1527 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1528 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1529 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1530 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1531 1532 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1533 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1534 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1535 1536 /* Find out where my gcols should go */ 1537 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1538 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1539 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1540 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1541 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1542 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1543 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1544 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1545 1546 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1547 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1548 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1549 for (i=0; i<m; i++) { 1550 PetscInt row = rdest[i]; 1551 PetscMPIInt rowner; 1552 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1553 for (j=ai[i]; j<ai[i+1]; j++) { 1554 PetscInt col = cdest[aj[j]]; 1555 PetscMPIInt cowner; 1556 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1557 if (rowner == cowner) dnnz[i]++; 1558 else onnz[i]++; 1559 } 1560 for (j=bi[i]; j<bi[i+1]; j++) { 1561 PetscInt col = gcdest[bj[j]]; 1562 PetscMPIInt cowner; 1563 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1564 if (rowner == cowner) dnnz[i]++; 1565 else onnz[i]++; 1566 } 1567 } 1568 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1569 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1570 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1571 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1572 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1573 1574 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1575 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1576 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1577 for (i=0; i<m; i++) { 1578 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1579 PetscInt j0,rowlen; 1580 rowlen = ai[i+1] - ai[i]; 1581 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1582 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1583 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1584 } 1585 rowlen = bi[i+1] - bi[i]; 1586 for (j0=j=0; j<rowlen; j0=j) { 1587 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1588 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1589 } 1590 } 1591 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1592 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1593 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1594 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1595 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1596 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1597 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1598 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1599 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1600 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1601 *B = Aperm; 1602 PetscFunctionReturn(0); 1603 } 1604 1605 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1606 { 1607 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1608 PetscErrorCode ierr; 1609 1610 PetscFunctionBegin; 1611 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1612 if (ghosts) *ghosts = aij->garray; 1613 PetscFunctionReturn(0); 1614 } 1615 1616 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1617 { 1618 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1619 Mat A = mat->A,B = mat->B; 1620 PetscErrorCode ierr; 1621 PetscLogDouble isend[5],irecv[5]; 1622 1623 PetscFunctionBegin; 1624 info->block_size = 1.0; 1625 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1626 1627 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1628 isend[3] = info->memory; isend[4] = info->mallocs; 1629 1630 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1631 1632 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1633 isend[3] += info->memory; isend[4] += info->mallocs; 1634 if (flag == MAT_LOCAL) { 1635 info->nz_used = isend[0]; 1636 info->nz_allocated = isend[1]; 1637 info->nz_unneeded = isend[2]; 1638 info->memory = isend[3]; 1639 info->mallocs = isend[4]; 1640 } else if (flag == MAT_GLOBAL_MAX) { 1641 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1642 1643 info->nz_used = irecv[0]; 1644 info->nz_allocated = irecv[1]; 1645 info->nz_unneeded = irecv[2]; 1646 info->memory = irecv[3]; 1647 info->mallocs = irecv[4]; 1648 } else if (flag == MAT_GLOBAL_SUM) { 1649 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1650 1651 info->nz_used = irecv[0]; 1652 info->nz_allocated = irecv[1]; 1653 info->nz_unneeded = irecv[2]; 1654 info->memory = irecv[3]; 1655 info->mallocs = irecv[4]; 1656 } 1657 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1658 info->fill_ratio_needed = 0; 1659 info->factor_mallocs = 0; 1660 PetscFunctionReturn(0); 1661 } 1662 1663 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1664 { 1665 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1666 PetscErrorCode ierr; 1667 1668 PetscFunctionBegin; 1669 switch (op) { 1670 case MAT_NEW_NONZERO_LOCATIONS: 1671 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1672 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1673 case MAT_KEEP_NONZERO_PATTERN: 1674 case MAT_NEW_NONZERO_LOCATION_ERR: 1675 case MAT_USE_INODES: 1676 case MAT_IGNORE_ZERO_ENTRIES: 1677 MatCheckPreallocated(A,1); 1678 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1679 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1680 break; 1681 case MAT_ROW_ORIENTED: 1682 MatCheckPreallocated(A,1); 1683 a->roworiented = flg; 1684 1685 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1686 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1687 break; 1688 case MAT_FORCE_DIAGONAL_ENTRIES: 1689 case MAT_SORTED_FULL: 1690 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1691 break; 1692 case MAT_IGNORE_OFF_PROC_ENTRIES: 1693 a->donotstash = flg; 1694 break; 1695 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1696 case MAT_SPD: 1697 case MAT_SYMMETRIC: 1698 case MAT_STRUCTURALLY_SYMMETRIC: 1699 case MAT_HERMITIAN: 1700 case MAT_SYMMETRY_ETERNAL: 1701 break; 1702 case MAT_SUBMAT_SINGLEIS: 1703 A->submat_singleis = flg; 1704 break; 1705 case MAT_STRUCTURE_ONLY: 1706 /* The option is handled directly by MatSetOption() */ 1707 break; 1708 default: 1709 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1710 } 1711 PetscFunctionReturn(0); 1712 } 1713 1714 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1715 { 1716 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1717 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1718 PetscErrorCode ierr; 1719 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1720 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1721 PetscInt *cmap,*idx_p; 1722 1723 PetscFunctionBegin; 1724 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1725 mat->getrowactive = PETSC_TRUE; 1726 1727 if (!mat->rowvalues && (idx || v)) { 1728 /* 1729 allocate enough space to hold information from the longest row. 1730 */ 1731 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1732 PetscInt max = 1,tmp; 1733 for (i=0; i<matin->rmap->n; i++) { 1734 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1735 if (max < tmp) max = tmp; 1736 } 1737 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1738 } 1739 1740 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1741 lrow = row - rstart; 1742 1743 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1744 if (!v) {pvA = NULL; pvB = NULL;} 1745 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1746 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1747 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1748 nztot = nzA + nzB; 1749 1750 cmap = mat->garray; 1751 if (v || idx) { 1752 if (nztot) { 1753 /* Sort by increasing column numbers, assuming A and B already sorted */ 1754 PetscInt imark = -1; 1755 if (v) { 1756 *v = v_p = mat->rowvalues; 1757 for (i=0; i<nzB; i++) { 1758 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1759 else break; 1760 } 1761 imark = i; 1762 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1763 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1764 } 1765 if (idx) { 1766 *idx = idx_p = mat->rowindices; 1767 if (imark > -1) { 1768 for (i=0; i<imark; i++) { 1769 idx_p[i] = cmap[cworkB[i]]; 1770 } 1771 } else { 1772 for (i=0; i<nzB; i++) { 1773 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1774 else break; 1775 } 1776 imark = i; 1777 } 1778 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1779 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1780 } 1781 } else { 1782 if (idx) *idx = NULL; 1783 if (v) *v = NULL; 1784 } 1785 } 1786 *nz = nztot; 1787 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1788 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1789 PetscFunctionReturn(0); 1790 } 1791 1792 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1793 { 1794 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1795 1796 PetscFunctionBegin; 1797 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1798 aij->getrowactive = PETSC_FALSE; 1799 PetscFunctionReturn(0); 1800 } 1801 1802 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1803 { 1804 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1805 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1806 PetscErrorCode ierr; 1807 PetscInt i,j,cstart = mat->cmap->rstart; 1808 PetscReal sum = 0.0; 1809 MatScalar *v; 1810 1811 PetscFunctionBegin; 1812 if (aij->size == 1) { 1813 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1814 } else { 1815 if (type == NORM_FROBENIUS) { 1816 v = amat->a; 1817 for (i=0; i<amat->nz; i++) { 1818 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1819 } 1820 v = bmat->a; 1821 for (i=0; i<bmat->nz; i++) { 1822 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1823 } 1824 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1825 *norm = PetscSqrtReal(*norm); 1826 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1827 } else if (type == NORM_1) { /* max column norm */ 1828 PetscReal *tmp,*tmp2; 1829 PetscInt *jj,*garray = aij->garray; 1830 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1831 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1832 *norm = 0.0; 1833 v = amat->a; jj = amat->j; 1834 for (j=0; j<amat->nz; j++) { 1835 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1836 } 1837 v = bmat->a; jj = bmat->j; 1838 for (j=0; j<bmat->nz; j++) { 1839 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1840 } 1841 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1842 for (j=0; j<mat->cmap->N; j++) { 1843 if (tmp2[j] > *norm) *norm = tmp2[j]; 1844 } 1845 ierr = PetscFree(tmp);CHKERRQ(ierr); 1846 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1847 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1848 } else if (type == NORM_INFINITY) { /* max row norm */ 1849 PetscReal ntemp = 0.0; 1850 for (j=0; j<aij->A->rmap->n; j++) { 1851 v = amat->a + amat->i[j]; 1852 sum = 0.0; 1853 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1854 sum += PetscAbsScalar(*v); v++; 1855 } 1856 v = bmat->a + bmat->i[j]; 1857 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1858 sum += PetscAbsScalar(*v); v++; 1859 } 1860 if (sum > ntemp) ntemp = sum; 1861 } 1862 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1863 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1864 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1865 } 1866 PetscFunctionReturn(0); 1867 } 1868 1869 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1870 { 1871 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1872 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1873 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1874 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1875 PetscErrorCode ierr; 1876 Mat B,A_diag,*B_diag; 1877 const MatScalar *pbv,*bv; 1878 1879 PetscFunctionBegin; 1880 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1881 ai = Aloc->i; aj = Aloc->j; 1882 bi = Bloc->i; bj = Bloc->j; 1883 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1884 PetscInt *d_nnz,*g_nnz,*o_nnz; 1885 PetscSFNode *oloc; 1886 PETSC_UNUSED PetscSF sf; 1887 1888 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1889 /* compute d_nnz for preallocation */ 1890 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 1891 for (i=0; i<ai[ma]; i++) { 1892 d_nnz[aj[i]]++; 1893 } 1894 /* compute local off-diagonal contributions */ 1895 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 1896 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1897 /* map those to global */ 1898 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1899 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1900 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1901 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 1902 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1903 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1904 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1905 1906 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1907 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1908 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1909 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1910 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1911 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1912 } else { 1913 B = *matout; 1914 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1915 } 1916 1917 b = (Mat_MPIAIJ*)B->data; 1918 A_diag = a->A; 1919 B_diag = &b->A; 1920 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1921 A_diag_ncol = A_diag->cmap->N; 1922 B_diag_ilen = sub_B_diag->ilen; 1923 B_diag_i = sub_B_diag->i; 1924 1925 /* Set ilen for diagonal of B */ 1926 for (i=0; i<A_diag_ncol; i++) { 1927 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1928 } 1929 1930 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1931 very quickly (=without using MatSetValues), because all writes are local. */ 1932 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 1933 1934 /* copy over the B part */ 1935 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 1936 ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr); 1937 pbv = bv; 1938 row = A->rmap->rstart; 1939 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1940 cols_tmp = cols; 1941 for (i=0; i<mb; i++) { 1942 ncol = bi[i+1]-bi[i]; 1943 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr); 1944 row++; 1945 pbv += ncol; cols_tmp += ncol; 1946 } 1947 ierr = PetscFree(cols);CHKERRQ(ierr); 1948 ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr); 1949 1950 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1951 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1952 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1953 *matout = B; 1954 } else { 1955 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1956 } 1957 PetscFunctionReturn(0); 1958 } 1959 1960 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1961 { 1962 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1963 Mat a = aij->A,b = aij->B; 1964 PetscErrorCode ierr; 1965 PetscInt s1,s2,s3; 1966 1967 PetscFunctionBegin; 1968 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1969 if (rr) { 1970 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1971 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1972 /* Overlap communication with computation. */ 1973 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1974 } 1975 if (ll) { 1976 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1977 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1978 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1979 } 1980 /* scale the diagonal block */ 1981 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1982 1983 if (rr) { 1984 /* Do a scatter end and then right scale the off-diagonal block */ 1985 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1986 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 1987 } 1988 PetscFunctionReturn(0); 1989 } 1990 1991 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1992 { 1993 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1994 PetscErrorCode ierr; 1995 1996 PetscFunctionBegin; 1997 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1998 PetscFunctionReturn(0); 1999 } 2000 2001 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2002 { 2003 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2004 Mat a,b,c,d; 2005 PetscBool flg; 2006 PetscErrorCode ierr; 2007 2008 PetscFunctionBegin; 2009 a = matA->A; b = matA->B; 2010 c = matB->A; d = matB->B; 2011 2012 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2013 if (flg) { 2014 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2015 } 2016 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2017 PetscFunctionReturn(0); 2018 } 2019 2020 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2021 { 2022 PetscErrorCode ierr; 2023 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2024 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2025 2026 PetscFunctionBegin; 2027 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2028 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2029 /* because of the column compression in the off-processor part of the matrix a->B, 2030 the number of columns in a->B and b->B may be different, hence we cannot call 2031 the MatCopy() directly on the two parts. If need be, we can provide a more 2032 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2033 then copying the submatrices */ 2034 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2035 } else { 2036 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2037 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2038 } 2039 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2040 PetscFunctionReturn(0); 2041 } 2042 2043 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2044 { 2045 PetscErrorCode ierr; 2046 2047 PetscFunctionBegin; 2048 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2049 PetscFunctionReturn(0); 2050 } 2051 2052 /* 2053 Computes the number of nonzeros per row needed for preallocation when X and Y 2054 have different nonzero structure. 2055 */ 2056 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2057 { 2058 PetscInt i,j,k,nzx,nzy; 2059 2060 PetscFunctionBegin; 2061 /* Set the number of nonzeros in the new matrix */ 2062 for (i=0; i<m; i++) { 2063 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2064 nzx = xi[i+1] - xi[i]; 2065 nzy = yi[i+1] - yi[i]; 2066 nnz[i] = 0; 2067 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2068 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2069 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2070 nnz[i]++; 2071 } 2072 for (; k<nzy; k++) nnz[i]++; 2073 } 2074 PetscFunctionReturn(0); 2075 } 2076 2077 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2078 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2079 { 2080 PetscErrorCode ierr; 2081 PetscInt m = Y->rmap->N; 2082 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2083 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2084 2085 PetscFunctionBegin; 2086 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2087 PetscFunctionReturn(0); 2088 } 2089 2090 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2091 { 2092 PetscErrorCode ierr; 2093 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2094 2095 PetscFunctionBegin; 2096 if (str == SAME_NONZERO_PATTERN) { 2097 ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr); 2098 ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr); 2099 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2100 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2101 } else { 2102 Mat B; 2103 PetscInt *nnz_d,*nnz_o; 2104 2105 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2106 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2107 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2108 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2109 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2110 ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr); 2111 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2112 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2113 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2114 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2115 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2116 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2117 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2118 } 2119 PetscFunctionReturn(0); 2120 } 2121 2122 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2123 2124 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2125 { 2126 #if defined(PETSC_USE_COMPLEX) 2127 PetscErrorCode ierr; 2128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2129 2130 PetscFunctionBegin; 2131 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2132 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2133 #else 2134 PetscFunctionBegin; 2135 #endif 2136 PetscFunctionReturn(0); 2137 } 2138 2139 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2140 { 2141 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2142 PetscErrorCode ierr; 2143 2144 PetscFunctionBegin; 2145 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2146 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2147 PetscFunctionReturn(0); 2148 } 2149 2150 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2151 { 2152 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2153 PetscErrorCode ierr; 2154 2155 PetscFunctionBegin; 2156 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2157 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2158 PetscFunctionReturn(0); 2159 } 2160 2161 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2162 { 2163 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2164 PetscErrorCode ierr; 2165 PetscInt i,*idxb = NULL,m = A->rmap->n; 2166 PetscScalar *va,*vv; 2167 Vec vB,vA; 2168 const PetscScalar *vb; 2169 2170 PetscFunctionBegin; 2171 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 2172 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2173 2174 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2175 if (idx) { 2176 for (i=0; i<m; i++) { 2177 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2178 } 2179 } 2180 2181 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 2182 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 2183 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2184 2185 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2186 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2187 for (i=0; i<m; i++) { 2188 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2189 vv[i] = vb[i]; 2190 if (idx) idx[i] = a->garray[idxb[i]]; 2191 } else { 2192 vv[i] = va[i]; 2193 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2194 idx[i] = a->garray[idxb[i]]; 2195 } 2196 } 2197 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2198 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2199 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2200 ierr = PetscFree(idxb);CHKERRQ(ierr); 2201 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2202 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2203 PetscFunctionReturn(0); 2204 } 2205 2206 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2207 { 2208 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2209 PetscInt m = A->rmap->n,n = A->cmap->n; 2210 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2211 PetscInt *cmap = mat->garray; 2212 PetscInt *diagIdx, *offdiagIdx; 2213 Vec diagV, offdiagV; 2214 PetscScalar *a, *diagA, *offdiagA; 2215 const PetscScalar *ba,*bav; 2216 PetscInt r,j,col,ncols,*bi,*bj; 2217 PetscErrorCode ierr; 2218 Mat B = mat->B; 2219 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2220 2221 PetscFunctionBegin; 2222 /* When a process holds entire A and other processes have no entry */ 2223 if (A->cmap->N == n) { 2224 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2225 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2226 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr); 2227 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2228 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2229 PetscFunctionReturn(0); 2230 } else if (n == 0) { 2231 if (m) { 2232 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2233 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2234 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2235 } 2236 PetscFunctionReturn(0); 2237 } 2238 2239 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2240 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2241 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2242 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2243 2244 /* Get offdiagIdx[] for implicit 0.0 */ 2245 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2246 ba = bav; 2247 bi = b->i; 2248 bj = b->j; 2249 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2250 for (r = 0; r < m; r++) { 2251 ncols = bi[r+1] - bi[r]; 2252 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2253 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2254 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2255 offdiagA[r] = 0.0; 2256 2257 /* Find first hole in the cmap */ 2258 for (j=0; j<ncols; j++) { 2259 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2260 if (col > j && j < cstart) { 2261 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2262 break; 2263 } else if (col > j + n && j >= cstart) { 2264 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2265 break; 2266 } 2267 } 2268 if (j == ncols && ncols < A->cmap->N - n) { 2269 /* a hole is outside compressed Bcols */ 2270 if (ncols == 0) { 2271 if (cstart) { 2272 offdiagIdx[r] = 0; 2273 } else offdiagIdx[r] = cend; 2274 } else { /* ncols > 0 */ 2275 offdiagIdx[r] = cmap[ncols-1] + 1; 2276 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2277 } 2278 } 2279 } 2280 2281 for (j=0; j<ncols; j++) { 2282 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2283 ba++; bj++; 2284 } 2285 } 2286 2287 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2288 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2289 for (r = 0; r < m; ++r) { 2290 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2291 a[r] = diagA[r]; 2292 if (idx) idx[r] = cstart + diagIdx[r]; 2293 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2294 a[r] = diagA[r]; 2295 if (idx) { 2296 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2297 idx[r] = cstart + diagIdx[r]; 2298 } else idx[r] = offdiagIdx[r]; 2299 } 2300 } else { 2301 a[r] = offdiagA[r]; 2302 if (idx) idx[r] = offdiagIdx[r]; 2303 } 2304 } 2305 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2306 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2307 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2308 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2309 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2310 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2311 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2312 PetscFunctionReturn(0); 2313 } 2314 2315 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2316 { 2317 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2318 PetscInt m = A->rmap->n,n = A->cmap->n; 2319 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2320 PetscInt *cmap = mat->garray; 2321 PetscInt *diagIdx, *offdiagIdx; 2322 Vec diagV, offdiagV; 2323 PetscScalar *a, *diagA, *offdiagA; 2324 const PetscScalar *ba,*bav; 2325 PetscInt r,j,col,ncols,*bi,*bj; 2326 PetscErrorCode ierr; 2327 Mat B = mat->B; 2328 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2329 2330 PetscFunctionBegin; 2331 /* When a process holds entire A and other processes have no entry */ 2332 if (A->cmap->N == n) { 2333 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2334 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2335 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2336 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2337 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2338 PetscFunctionReturn(0); 2339 } else if (n == 0) { 2340 if (m) { 2341 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2342 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2343 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2344 } 2345 PetscFunctionReturn(0); 2346 } 2347 2348 ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2349 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2350 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2351 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2352 2353 /* Get offdiagIdx[] for implicit 0.0 */ 2354 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2355 ba = bav; 2356 bi = b->i; 2357 bj = b->j; 2358 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2359 for (r = 0; r < m; r++) { 2360 ncols = bi[r+1] - bi[r]; 2361 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2362 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2363 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2364 offdiagA[r] = 0.0; 2365 2366 /* Find first hole in the cmap */ 2367 for (j=0; j<ncols; j++) { 2368 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2369 if (col > j && j < cstart) { 2370 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2371 break; 2372 } else if (col > j + n && j >= cstart) { 2373 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2374 break; 2375 } 2376 } 2377 if (j == ncols && ncols < A->cmap->N - n) { 2378 /* a hole is outside compressed Bcols */ 2379 if (ncols == 0) { 2380 if (cstart) { 2381 offdiagIdx[r] = 0; 2382 } else offdiagIdx[r] = cend; 2383 } else { /* ncols > 0 */ 2384 offdiagIdx[r] = cmap[ncols-1] + 1; 2385 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2386 } 2387 } 2388 } 2389 2390 for (j=0; j<ncols; j++) { 2391 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2392 ba++; bj++; 2393 } 2394 } 2395 2396 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2397 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2398 for (r = 0; r < m; ++r) { 2399 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2400 a[r] = diagA[r]; 2401 if (idx) idx[r] = cstart + diagIdx[r]; 2402 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2403 a[r] = diagA[r]; 2404 if (idx) { 2405 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2406 idx[r] = cstart + diagIdx[r]; 2407 } else idx[r] = offdiagIdx[r]; 2408 } 2409 } else { 2410 a[r] = offdiagA[r]; 2411 if (idx) idx[r] = offdiagIdx[r]; 2412 } 2413 } 2414 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2415 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2416 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2417 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2418 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2419 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2420 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2421 PetscFunctionReturn(0); 2422 } 2423 2424 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2425 { 2426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2427 PetscInt m = A->rmap->n,n = A->cmap->n; 2428 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2429 PetscInt *cmap = mat->garray; 2430 PetscInt *diagIdx, *offdiagIdx; 2431 Vec diagV, offdiagV; 2432 PetscScalar *a, *diagA, *offdiagA; 2433 const PetscScalar *ba,*bav; 2434 PetscInt r,j,col,ncols,*bi,*bj; 2435 PetscErrorCode ierr; 2436 Mat B = mat->B; 2437 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2438 2439 PetscFunctionBegin; 2440 /* When a process holds entire A and other processes have no entry */ 2441 if (A->cmap->N == n) { 2442 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2443 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2444 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2445 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2446 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2447 PetscFunctionReturn(0); 2448 } else if (n == 0) { 2449 if (m) { 2450 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2451 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2452 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2453 } 2454 PetscFunctionReturn(0); 2455 } 2456 2457 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2458 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2459 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2460 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2461 2462 /* Get offdiagIdx[] for implicit 0.0 */ 2463 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2464 ba = bav; 2465 bi = b->i; 2466 bj = b->j; 2467 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2468 for (r = 0; r < m; r++) { 2469 ncols = bi[r+1] - bi[r]; 2470 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2471 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2472 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2473 offdiagA[r] = 0.0; 2474 2475 /* Find first hole in the cmap */ 2476 for (j=0; j<ncols; j++) { 2477 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2478 if (col > j && j < cstart) { 2479 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2480 break; 2481 } else if (col > j + n && j >= cstart) { 2482 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2483 break; 2484 } 2485 } 2486 if (j == ncols && ncols < A->cmap->N - n) { 2487 /* a hole is outside compressed Bcols */ 2488 if (ncols == 0) { 2489 if (cstart) { 2490 offdiagIdx[r] = 0; 2491 } else offdiagIdx[r] = cend; 2492 } else { /* ncols > 0 */ 2493 offdiagIdx[r] = cmap[ncols-1] + 1; 2494 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2495 } 2496 } 2497 } 2498 2499 for (j=0; j<ncols; j++) { 2500 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2501 ba++; bj++; 2502 } 2503 } 2504 2505 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2506 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2507 for (r = 0; r < m; ++r) { 2508 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2509 a[r] = diagA[r]; 2510 if (idx) idx[r] = cstart + diagIdx[r]; 2511 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2512 a[r] = diagA[r]; 2513 if (idx) { 2514 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2515 idx[r] = cstart + diagIdx[r]; 2516 } else idx[r] = offdiagIdx[r]; 2517 } 2518 } else { 2519 a[r] = offdiagA[r]; 2520 if (idx) idx[r] = offdiagIdx[r]; 2521 } 2522 } 2523 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2524 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2525 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2526 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2527 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2528 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2529 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2530 PetscFunctionReturn(0); 2531 } 2532 2533 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2534 { 2535 PetscErrorCode ierr; 2536 Mat *dummy; 2537 2538 PetscFunctionBegin; 2539 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2540 *newmat = *dummy; 2541 ierr = PetscFree(dummy);CHKERRQ(ierr); 2542 PetscFunctionReturn(0); 2543 } 2544 2545 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2546 { 2547 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2548 PetscErrorCode ierr; 2549 2550 PetscFunctionBegin; 2551 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2552 A->factorerrortype = a->A->factorerrortype; 2553 PetscFunctionReturn(0); 2554 } 2555 2556 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2557 { 2558 PetscErrorCode ierr; 2559 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2560 2561 PetscFunctionBegin; 2562 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2563 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2564 if (x->assembled) { 2565 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2566 } else { 2567 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2568 } 2569 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2570 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2571 PetscFunctionReturn(0); 2572 } 2573 2574 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2575 { 2576 PetscFunctionBegin; 2577 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2578 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2579 PetscFunctionReturn(0); 2580 } 2581 2582 /*@ 2583 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2584 2585 Collective on Mat 2586 2587 Input Parameters: 2588 + A - the matrix 2589 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2590 2591 Level: advanced 2592 2593 @*/ 2594 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2595 { 2596 PetscErrorCode ierr; 2597 2598 PetscFunctionBegin; 2599 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2600 PetscFunctionReturn(0); 2601 } 2602 2603 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2604 { 2605 PetscErrorCode ierr; 2606 PetscBool sc = PETSC_FALSE,flg; 2607 2608 PetscFunctionBegin; 2609 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2610 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2611 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2612 if (flg) { 2613 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2614 } 2615 ierr = PetscOptionsTail();CHKERRQ(ierr); 2616 PetscFunctionReturn(0); 2617 } 2618 2619 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2620 { 2621 PetscErrorCode ierr; 2622 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2623 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2624 2625 PetscFunctionBegin; 2626 if (!Y->preallocated) { 2627 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2628 } else if (!aij->nz) { 2629 PetscInt nonew = aij->nonew; 2630 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2631 aij->nonew = nonew; 2632 } 2633 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2634 PetscFunctionReturn(0); 2635 } 2636 2637 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2638 { 2639 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2640 PetscErrorCode ierr; 2641 2642 PetscFunctionBegin; 2643 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2644 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2645 if (d) { 2646 PetscInt rstart; 2647 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2648 *d += rstart; 2649 2650 } 2651 PetscFunctionReturn(0); 2652 } 2653 2654 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2655 { 2656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2657 PetscErrorCode ierr; 2658 2659 PetscFunctionBegin; 2660 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2661 PetscFunctionReturn(0); 2662 } 2663 2664 /* -------------------------------------------------------------------*/ 2665 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2666 MatGetRow_MPIAIJ, 2667 MatRestoreRow_MPIAIJ, 2668 MatMult_MPIAIJ, 2669 /* 4*/ MatMultAdd_MPIAIJ, 2670 MatMultTranspose_MPIAIJ, 2671 MatMultTransposeAdd_MPIAIJ, 2672 NULL, 2673 NULL, 2674 NULL, 2675 /*10*/ NULL, 2676 NULL, 2677 NULL, 2678 MatSOR_MPIAIJ, 2679 MatTranspose_MPIAIJ, 2680 /*15*/ MatGetInfo_MPIAIJ, 2681 MatEqual_MPIAIJ, 2682 MatGetDiagonal_MPIAIJ, 2683 MatDiagonalScale_MPIAIJ, 2684 MatNorm_MPIAIJ, 2685 /*20*/ MatAssemblyBegin_MPIAIJ, 2686 MatAssemblyEnd_MPIAIJ, 2687 MatSetOption_MPIAIJ, 2688 MatZeroEntries_MPIAIJ, 2689 /*24*/ MatZeroRows_MPIAIJ, 2690 NULL, 2691 NULL, 2692 NULL, 2693 NULL, 2694 /*29*/ MatSetUp_MPIAIJ, 2695 NULL, 2696 NULL, 2697 MatGetDiagonalBlock_MPIAIJ, 2698 NULL, 2699 /*34*/ MatDuplicate_MPIAIJ, 2700 NULL, 2701 NULL, 2702 NULL, 2703 NULL, 2704 /*39*/ MatAXPY_MPIAIJ, 2705 MatCreateSubMatrices_MPIAIJ, 2706 MatIncreaseOverlap_MPIAIJ, 2707 MatGetValues_MPIAIJ, 2708 MatCopy_MPIAIJ, 2709 /*44*/ MatGetRowMax_MPIAIJ, 2710 MatScale_MPIAIJ, 2711 MatShift_MPIAIJ, 2712 MatDiagonalSet_MPIAIJ, 2713 MatZeroRowsColumns_MPIAIJ, 2714 /*49*/ MatSetRandom_MPIAIJ, 2715 NULL, 2716 NULL, 2717 NULL, 2718 NULL, 2719 /*54*/ MatFDColoringCreate_MPIXAIJ, 2720 NULL, 2721 MatSetUnfactored_MPIAIJ, 2722 MatPermute_MPIAIJ, 2723 NULL, 2724 /*59*/ MatCreateSubMatrix_MPIAIJ, 2725 MatDestroy_MPIAIJ, 2726 MatView_MPIAIJ, 2727 NULL, 2728 NULL, 2729 /*64*/ NULL, 2730 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2731 NULL, 2732 NULL, 2733 NULL, 2734 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2735 MatGetRowMinAbs_MPIAIJ, 2736 NULL, 2737 NULL, 2738 NULL, 2739 NULL, 2740 /*75*/ MatFDColoringApply_AIJ, 2741 MatSetFromOptions_MPIAIJ, 2742 NULL, 2743 NULL, 2744 MatFindZeroDiagonals_MPIAIJ, 2745 /*80*/ NULL, 2746 NULL, 2747 NULL, 2748 /*83*/ MatLoad_MPIAIJ, 2749 MatIsSymmetric_MPIAIJ, 2750 NULL, 2751 NULL, 2752 NULL, 2753 NULL, 2754 /*89*/ NULL, 2755 NULL, 2756 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2757 NULL, 2758 NULL, 2759 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2760 NULL, 2761 NULL, 2762 NULL, 2763 MatBindToCPU_MPIAIJ, 2764 /*99*/ MatProductSetFromOptions_MPIAIJ, 2765 NULL, 2766 NULL, 2767 MatConjugate_MPIAIJ, 2768 NULL, 2769 /*104*/MatSetValuesRow_MPIAIJ, 2770 MatRealPart_MPIAIJ, 2771 MatImaginaryPart_MPIAIJ, 2772 NULL, 2773 NULL, 2774 /*109*/NULL, 2775 NULL, 2776 MatGetRowMin_MPIAIJ, 2777 NULL, 2778 MatMissingDiagonal_MPIAIJ, 2779 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2780 NULL, 2781 MatGetGhosts_MPIAIJ, 2782 NULL, 2783 NULL, 2784 /*119*/MatMultDiagonalBlock_MPIAIJ, 2785 NULL, 2786 NULL, 2787 NULL, 2788 MatGetMultiProcBlock_MPIAIJ, 2789 /*124*/MatFindNonzeroRows_MPIAIJ, 2790 MatGetColumnNorms_MPIAIJ, 2791 MatInvertBlockDiagonal_MPIAIJ, 2792 MatInvertVariableBlockDiagonal_MPIAIJ, 2793 MatCreateSubMatricesMPI_MPIAIJ, 2794 /*129*/NULL, 2795 NULL, 2796 NULL, 2797 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2798 NULL, 2799 /*134*/NULL, 2800 NULL, 2801 NULL, 2802 NULL, 2803 NULL, 2804 /*139*/MatSetBlockSizes_MPIAIJ, 2805 NULL, 2806 NULL, 2807 MatFDColoringSetUp_MPIXAIJ, 2808 MatFindOffBlockDiagonalEntries_MPIAIJ, 2809 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2810 /*145*/NULL, 2811 NULL, 2812 NULL 2813 }; 2814 2815 /* ----------------------------------------------------------------------------------------*/ 2816 2817 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2818 { 2819 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2820 PetscErrorCode ierr; 2821 2822 PetscFunctionBegin; 2823 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2824 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2825 PetscFunctionReturn(0); 2826 } 2827 2828 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2829 { 2830 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2831 PetscErrorCode ierr; 2832 2833 PetscFunctionBegin; 2834 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2835 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2836 PetscFunctionReturn(0); 2837 } 2838 2839 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2840 { 2841 Mat_MPIAIJ *b; 2842 PetscErrorCode ierr; 2843 PetscMPIInt size; 2844 2845 PetscFunctionBegin; 2846 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2847 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2848 b = (Mat_MPIAIJ*)B->data; 2849 2850 #if defined(PETSC_USE_CTABLE) 2851 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2852 #else 2853 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2854 #endif 2855 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2856 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2857 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2858 2859 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2860 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 2861 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2862 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2863 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2864 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2865 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2866 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2867 2868 if (!B->preallocated) { 2869 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2870 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2871 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2872 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2873 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2874 } 2875 2876 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2877 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2878 B->preallocated = PETSC_TRUE; 2879 B->was_assembled = PETSC_FALSE; 2880 B->assembled = PETSC_FALSE; 2881 PetscFunctionReturn(0); 2882 } 2883 2884 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2885 { 2886 Mat_MPIAIJ *b; 2887 PetscErrorCode ierr; 2888 2889 PetscFunctionBegin; 2890 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2891 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2892 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2893 b = (Mat_MPIAIJ*)B->data; 2894 2895 #if defined(PETSC_USE_CTABLE) 2896 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2897 #else 2898 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2899 #endif 2900 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2901 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2902 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2903 2904 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2905 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2906 B->preallocated = PETSC_TRUE; 2907 B->was_assembled = PETSC_FALSE; 2908 B->assembled = PETSC_FALSE; 2909 PetscFunctionReturn(0); 2910 } 2911 2912 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2913 { 2914 Mat mat; 2915 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2916 PetscErrorCode ierr; 2917 2918 PetscFunctionBegin; 2919 *newmat = NULL; 2920 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2921 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2922 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2923 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2924 a = (Mat_MPIAIJ*)mat->data; 2925 2926 mat->factortype = matin->factortype; 2927 mat->assembled = matin->assembled; 2928 mat->insertmode = NOT_SET_VALUES; 2929 mat->preallocated = matin->preallocated; 2930 2931 a->size = oldmat->size; 2932 a->rank = oldmat->rank; 2933 a->donotstash = oldmat->donotstash; 2934 a->roworiented = oldmat->roworiented; 2935 a->rowindices = NULL; 2936 a->rowvalues = NULL; 2937 a->getrowactive = PETSC_FALSE; 2938 2939 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2940 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2941 2942 if (oldmat->colmap) { 2943 #if defined(PETSC_USE_CTABLE) 2944 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2945 #else 2946 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2947 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2948 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2949 #endif 2950 } else a->colmap = NULL; 2951 if (oldmat->garray) { 2952 PetscInt len; 2953 len = oldmat->B->cmap->n; 2954 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2955 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2956 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2957 } else a->garray = NULL; 2958 2959 /* It may happen MatDuplicate is called with a non-assembled matrix 2960 In fact, MatDuplicate only requires the matrix to be preallocated 2961 This may happen inside a DMCreateMatrix_Shell */ 2962 if (oldmat->lvec) { 2963 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2964 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2965 } 2966 if (oldmat->Mvctx) { 2967 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2968 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2969 } 2970 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2971 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2972 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2973 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2974 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2975 *newmat = mat; 2976 PetscFunctionReturn(0); 2977 } 2978 2979 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2980 { 2981 PetscBool isbinary, ishdf5; 2982 PetscErrorCode ierr; 2983 2984 PetscFunctionBegin; 2985 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2986 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2987 /* force binary viewer to load .info file if it has not yet done so */ 2988 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2989 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2990 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2991 if (isbinary) { 2992 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2993 } else if (ishdf5) { 2994 #if defined(PETSC_HAVE_HDF5) 2995 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2996 #else 2997 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2998 #endif 2999 } else { 3000 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3001 } 3002 PetscFunctionReturn(0); 3003 } 3004 3005 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3006 { 3007 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3008 PetscInt *rowidxs,*colidxs; 3009 PetscScalar *matvals; 3010 PetscErrorCode ierr; 3011 3012 PetscFunctionBegin; 3013 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3014 3015 /* read in matrix header */ 3016 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3017 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3018 M = header[1]; N = header[2]; nz = header[3]; 3019 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3020 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3021 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3022 3023 /* set block sizes from the viewer's .info file */ 3024 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3025 /* set global sizes if not set already */ 3026 if (mat->rmap->N < 0) mat->rmap->N = M; 3027 if (mat->cmap->N < 0) mat->cmap->N = N; 3028 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3029 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3030 3031 /* check if the matrix sizes are correct */ 3032 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3033 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3034 3035 /* read in row lengths and build row indices */ 3036 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3037 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3038 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3039 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3040 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 3041 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3042 /* read in column indices and matrix values */ 3043 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3044 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3045 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3046 /* store matrix indices and values */ 3047 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3048 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3049 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3050 PetscFunctionReturn(0); 3051 } 3052 3053 /* Not scalable because of ISAllGather() unless getting all columns. */ 3054 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3055 { 3056 PetscErrorCode ierr; 3057 IS iscol_local; 3058 PetscBool isstride; 3059 PetscMPIInt lisstride=0,gisstride; 3060 3061 PetscFunctionBegin; 3062 /* check if we are grabbing all columns*/ 3063 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3064 3065 if (isstride) { 3066 PetscInt start,len,mstart,mlen; 3067 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3068 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3069 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3070 if (mstart == start && mlen-mstart == len) lisstride = 1; 3071 } 3072 3073 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3074 if (gisstride) { 3075 PetscInt N; 3076 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3077 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3078 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3079 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3080 } else { 3081 PetscInt cbs; 3082 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3083 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3084 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3085 } 3086 3087 *isseq = iscol_local; 3088 PetscFunctionReturn(0); 3089 } 3090 3091 /* 3092 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3093 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3094 3095 Input Parameters: 3096 mat - matrix 3097 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3098 i.e., mat->rstart <= isrow[i] < mat->rend 3099 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3100 i.e., mat->cstart <= iscol[i] < mat->cend 3101 Output Parameter: 3102 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3103 iscol_o - sequential column index set for retrieving mat->B 3104 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3105 */ 3106 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3107 { 3108 PetscErrorCode ierr; 3109 Vec x,cmap; 3110 const PetscInt *is_idx; 3111 PetscScalar *xarray,*cmaparray; 3112 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3113 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3114 Mat B=a->B; 3115 Vec lvec=a->lvec,lcmap; 3116 PetscInt i,cstart,cend,Bn=B->cmap->N; 3117 MPI_Comm comm; 3118 VecScatter Mvctx=a->Mvctx; 3119 3120 PetscFunctionBegin; 3121 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3122 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3123 3124 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3125 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3126 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3127 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3128 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3129 3130 /* Get start indices */ 3131 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3132 isstart -= ncols; 3133 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3134 3135 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3136 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3137 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3138 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3139 for (i=0; i<ncols; i++) { 3140 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3141 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3142 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3143 } 3144 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3145 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3146 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3147 3148 /* Get iscol_d */ 3149 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3150 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3151 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3152 3153 /* Get isrow_d */ 3154 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3155 rstart = mat->rmap->rstart; 3156 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3157 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3158 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3159 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3160 3161 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3162 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3163 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3164 3165 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3166 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3167 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3168 3169 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3170 3171 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3172 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3173 3174 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3175 /* off-process column indices */ 3176 count = 0; 3177 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3178 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3179 3180 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3181 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3182 for (i=0; i<Bn; i++) { 3183 if (PetscRealPart(xarray[i]) > -1.0) { 3184 idx[count] = i; /* local column index in off-diagonal part B */ 3185 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3186 count++; 3187 } 3188 } 3189 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3190 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3191 3192 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3193 /* cannot ensure iscol_o has same blocksize as iscol! */ 3194 3195 ierr = PetscFree(idx);CHKERRQ(ierr); 3196 *garray = cmap1; 3197 3198 ierr = VecDestroy(&x);CHKERRQ(ierr); 3199 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3200 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3201 PetscFunctionReturn(0); 3202 } 3203 3204 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3205 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3206 { 3207 PetscErrorCode ierr; 3208 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3209 Mat M = NULL; 3210 MPI_Comm comm; 3211 IS iscol_d,isrow_d,iscol_o; 3212 Mat Asub = NULL,Bsub = NULL; 3213 PetscInt n; 3214 3215 PetscFunctionBegin; 3216 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3217 3218 if (call == MAT_REUSE_MATRIX) { 3219 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3220 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3221 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3222 3223 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3224 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3225 3226 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3227 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3228 3229 /* Update diagonal and off-diagonal portions of submat */ 3230 asub = (Mat_MPIAIJ*)(*submat)->data; 3231 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3232 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3233 if (n) { 3234 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3235 } 3236 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3237 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3238 3239 } else { /* call == MAT_INITIAL_MATRIX) */ 3240 const PetscInt *garray; 3241 PetscInt BsubN; 3242 3243 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3244 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3245 3246 /* Create local submatrices Asub and Bsub */ 3247 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3248 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3249 3250 /* Create submatrix M */ 3251 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3252 3253 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3254 asub = (Mat_MPIAIJ*)M->data; 3255 3256 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3257 n = asub->B->cmap->N; 3258 if (BsubN > n) { 3259 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3260 const PetscInt *idx; 3261 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3262 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3263 3264 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3265 j = 0; 3266 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3267 for (i=0; i<n; i++) { 3268 if (j >= BsubN) break; 3269 while (subgarray[i] > garray[j]) j++; 3270 3271 if (subgarray[i] == garray[j]) { 3272 idx_new[i] = idx[j++]; 3273 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3274 } 3275 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3276 3277 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3278 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3279 3280 } else if (BsubN < n) { 3281 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3282 } 3283 3284 ierr = PetscFree(garray);CHKERRQ(ierr); 3285 *submat = M; 3286 3287 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3288 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3289 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3290 3291 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3292 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3293 3294 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3295 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3296 } 3297 PetscFunctionReturn(0); 3298 } 3299 3300 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3301 { 3302 PetscErrorCode ierr; 3303 IS iscol_local=NULL,isrow_d; 3304 PetscInt csize; 3305 PetscInt n,i,j,start,end; 3306 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3307 MPI_Comm comm; 3308 3309 PetscFunctionBegin; 3310 /* If isrow has same processor distribution as mat, 3311 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3312 if (call == MAT_REUSE_MATRIX) { 3313 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3314 if (isrow_d) { 3315 sameRowDist = PETSC_TRUE; 3316 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3317 } else { 3318 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3319 if (iscol_local) { 3320 sameRowDist = PETSC_TRUE; 3321 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3322 } 3323 } 3324 } else { 3325 /* Check if isrow has same processor distribution as mat */ 3326 sameDist[0] = PETSC_FALSE; 3327 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3328 if (!n) { 3329 sameDist[0] = PETSC_TRUE; 3330 } else { 3331 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3332 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3333 if (i >= start && j < end) { 3334 sameDist[0] = PETSC_TRUE; 3335 } 3336 } 3337 3338 /* Check if iscol has same processor distribution as mat */ 3339 sameDist[1] = PETSC_FALSE; 3340 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3341 if (!n) { 3342 sameDist[1] = PETSC_TRUE; 3343 } else { 3344 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3345 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3346 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3347 } 3348 3349 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3350 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3351 sameRowDist = tsameDist[0]; 3352 } 3353 3354 if (sameRowDist) { 3355 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3356 /* isrow and iscol have same processor distribution as mat */ 3357 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3358 PetscFunctionReturn(0); 3359 } else { /* sameRowDist */ 3360 /* isrow has same processor distribution as mat */ 3361 if (call == MAT_INITIAL_MATRIX) { 3362 PetscBool sorted; 3363 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3364 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3365 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3366 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3367 3368 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3369 if (sorted) { 3370 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3371 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3372 PetscFunctionReturn(0); 3373 } 3374 } else { /* call == MAT_REUSE_MATRIX */ 3375 IS iscol_sub; 3376 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3377 if (iscol_sub) { 3378 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3379 PetscFunctionReturn(0); 3380 } 3381 } 3382 } 3383 } 3384 3385 /* General case: iscol -> iscol_local which has global size of iscol */ 3386 if (call == MAT_REUSE_MATRIX) { 3387 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3388 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3389 } else { 3390 if (!iscol_local) { 3391 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3392 } 3393 } 3394 3395 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3396 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3397 3398 if (call == MAT_INITIAL_MATRIX) { 3399 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3400 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3401 } 3402 PetscFunctionReturn(0); 3403 } 3404 3405 /*@C 3406 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3407 and "off-diagonal" part of the matrix in CSR format. 3408 3409 Collective 3410 3411 Input Parameters: 3412 + comm - MPI communicator 3413 . A - "diagonal" portion of matrix 3414 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3415 - garray - global index of B columns 3416 3417 Output Parameter: 3418 . mat - the matrix, with input A as its local diagonal matrix 3419 Level: advanced 3420 3421 Notes: 3422 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3423 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3424 3425 .seealso: MatCreateMPIAIJWithSplitArrays() 3426 @*/ 3427 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3428 { 3429 PetscErrorCode ierr; 3430 Mat_MPIAIJ *maij; 3431 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3432 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3433 const PetscScalar *oa; 3434 Mat Bnew; 3435 PetscInt m,n,N; 3436 3437 PetscFunctionBegin; 3438 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3439 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3440 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3441 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3442 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3443 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3444 3445 /* Get global columns of mat */ 3446 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3447 3448 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3449 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3450 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3451 maij = (Mat_MPIAIJ*)(*mat)->data; 3452 3453 (*mat)->preallocated = PETSC_TRUE; 3454 3455 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3456 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3457 3458 /* Set A as diagonal portion of *mat */ 3459 maij->A = A; 3460 3461 nz = oi[m]; 3462 for (i=0; i<nz; i++) { 3463 col = oj[i]; 3464 oj[i] = garray[col]; 3465 } 3466 3467 /* Set Bnew as off-diagonal portion of *mat */ 3468 ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr); 3469 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr); 3470 ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr); 3471 bnew = (Mat_SeqAIJ*)Bnew->data; 3472 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3473 maij->B = Bnew; 3474 3475 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3476 3477 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3478 b->free_a = PETSC_FALSE; 3479 b->free_ij = PETSC_FALSE; 3480 ierr = MatDestroy(&B);CHKERRQ(ierr); 3481 3482 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3483 bnew->free_a = PETSC_TRUE; 3484 bnew->free_ij = PETSC_TRUE; 3485 3486 /* condense columns of maij->B */ 3487 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3488 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3489 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3490 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3491 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3492 PetscFunctionReturn(0); 3493 } 3494 3495 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3496 3497 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3498 { 3499 PetscErrorCode ierr; 3500 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3501 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3502 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3503 Mat M,Msub,B=a->B; 3504 MatScalar *aa; 3505 Mat_SeqAIJ *aij; 3506 PetscInt *garray = a->garray,*colsub,Ncols; 3507 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3508 IS iscol_sub,iscmap; 3509 const PetscInt *is_idx,*cmap; 3510 PetscBool allcolumns=PETSC_FALSE; 3511 MPI_Comm comm; 3512 3513 PetscFunctionBegin; 3514 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3515 if (call == MAT_REUSE_MATRIX) { 3516 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3517 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3518 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3519 3520 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3521 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3522 3523 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3524 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3525 3526 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3527 3528 } else { /* call == MAT_INITIAL_MATRIX) */ 3529 PetscBool flg; 3530 3531 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3532 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3533 3534 /* (1) iscol -> nonscalable iscol_local */ 3535 /* Check for special case: each processor gets entire matrix columns */ 3536 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3537 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3538 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3539 if (allcolumns) { 3540 iscol_sub = iscol_local; 3541 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3542 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3543 3544 } else { 3545 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3546 PetscInt *idx,*cmap1,k; 3547 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3548 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3549 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3550 count = 0; 3551 k = 0; 3552 for (i=0; i<Ncols; i++) { 3553 j = is_idx[i]; 3554 if (j >= cstart && j < cend) { 3555 /* diagonal part of mat */ 3556 idx[count] = j; 3557 cmap1[count++] = i; /* column index in submat */ 3558 } else if (Bn) { 3559 /* off-diagonal part of mat */ 3560 if (j == garray[k]) { 3561 idx[count] = j; 3562 cmap1[count++] = i; /* column index in submat */ 3563 } else if (j > garray[k]) { 3564 while (j > garray[k] && k < Bn-1) k++; 3565 if (j == garray[k]) { 3566 idx[count] = j; 3567 cmap1[count++] = i; /* column index in submat */ 3568 } 3569 } 3570 } 3571 } 3572 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3573 3574 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3575 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3576 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3577 3578 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3579 } 3580 3581 /* (3) Create sequential Msub */ 3582 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3583 } 3584 3585 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3586 aij = (Mat_SeqAIJ*)(Msub)->data; 3587 ii = aij->i; 3588 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3589 3590 /* 3591 m - number of local rows 3592 Ncols - number of columns (same on all processors) 3593 rstart - first row in new global matrix generated 3594 */ 3595 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3596 3597 if (call == MAT_INITIAL_MATRIX) { 3598 /* (4) Create parallel newmat */ 3599 PetscMPIInt rank,size; 3600 PetscInt csize; 3601 3602 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3603 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3604 3605 /* 3606 Determine the number of non-zeros in the diagonal and off-diagonal 3607 portions of the matrix in order to do correct preallocation 3608 */ 3609 3610 /* first get start and end of "diagonal" columns */ 3611 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3612 if (csize == PETSC_DECIDE) { 3613 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3614 if (mglobal == Ncols) { /* square matrix */ 3615 nlocal = m; 3616 } else { 3617 nlocal = Ncols/size + ((Ncols % size) > rank); 3618 } 3619 } else { 3620 nlocal = csize; 3621 } 3622 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3623 rstart = rend - nlocal; 3624 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3625 3626 /* next, compute all the lengths */ 3627 jj = aij->j; 3628 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3629 olens = dlens + m; 3630 for (i=0; i<m; i++) { 3631 jend = ii[i+1] - ii[i]; 3632 olen = 0; 3633 dlen = 0; 3634 for (j=0; j<jend; j++) { 3635 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3636 else dlen++; 3637 jj++; 3638 } 3639 olens[i] = olen; 3640 dlens[i] = dlen; 3641 } 3642 3643 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3644 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3645 3646 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3647 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3648 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3649 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3650 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3651 ierr = PetscFree(dlens);CHKERRQ(ierr); 3652 3653 } else { /* call == MAT_REUSE_MATRIX */ 3654 M = *newmat; 3655 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3656 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3657 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3658 /* 3659 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3660 rather than the slower MatSetValues(). 3661 */ 3662 M->was_assembled = PETSC_TRUE; 3663 M->assembled = PETSC_FALSE; 3664 } 3665 3666 /* (5) Set values of Msub to *newmat */ 3667 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3668 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3669 3670 jj = aij->j; 3671 ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3672 for (i=0; i<m; i++) { 3673 row = rstart + i; 3674 nz = ii[i+1] - ii[i]; 3675 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3676 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3677 jj += nz; aa += nz; 3678 } 3679 ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3680 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3681 3682 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3683 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3684 3685 ierr = PetscFree(colsub);CHKERRQ(ierr); 3686 3687 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3688 if (call == MAT_INITIAL_MATRIX) { 3689 *newmat = M; 3690 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3691 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3692 3693 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3694 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3695 3696 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3697 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3698 3699 if (iscol_local) { 3700 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3701 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3702 } 3703 } 3704 PetscFunctionReturn(0); 3705 } 3706 3707 /* 3708 Not great since it makes two copies of the submatrix, first an SeqAIJ 3709 in local and then by concatenating the local matrices the end result. 3710 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3711 3712 Note: This requires a sequential iscol with all indices. 3713 */ 3714 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3715 { 3716 PetscErrorCode ierr; 3717 PetscMPIInt rank,size; 3718 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3719 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3720 Mat M,Mreuse; 3721 MatScalar *aa,*vwork; 3722 MPI_Comm comm; 3723 Mat_SeqAIJ *aij; 3724 PetscBool colflag,allcolumns=PETSC_FALSE; 3725 3726 PetscFunctionBegin; 3727 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3728 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3729 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3730 3731 /* Check for special case: each processor gets entire matrix columns */ 3732 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3733 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3734 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3735 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3736 3737 if (call == MAT_REUSE_MATRIX) { 3738 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3739 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3740 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3741 } else { 3742 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3743 } 3744 3745 /* 3746 m - number of local rows 3747 n - number of columns (same on all processors) 3748 rstart - first row in new global matrix generated 3749 */ 3750 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3751 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3752 if (call == MAT_INITIAL_MATRIX) { 3753 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3754 ii = aij->i; 3755 jj = aij->j; 3756 3757 /* 3758 Determine the number of non-zeros in the diagonal and off-diagonal 3759 portions of the matrix in order to do correct preallocation 3760 */ 3761 3762 /* first get start and end of "diagonal" columns */ 3763 if (csize == PETSC_DECIDE) { 3764 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3765 if (mglobal == n) { /* square matrix */ 3766 nlocal = m; 3767 } else { 3768 nlocal = n/size + ((n % size) > rank); 3769 } 3770 } else { 3771 nlocal = csize; 3772 } 3773 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3774 rstart = rend - nlocal; 3775 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3776 3777 /* next, compute all the lengths */ 3778 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3779 olens = dlens + m; 3780 for (i=0; i<m; i++) { 3781 jend = ii[i+1] - ii[i]; 3782 olen = 0; 3783 dlen = 0; 3784 for (j=0; j<jend; j++) { 3785 if (*jj < rstart || *jj >= rend) olen++; 3786 else dlen++; 3787 jj++; 3788 } 3789 olens[i] = olen; 3790 dlens[i] = dlen; 3791 } 3792 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3793 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3794 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3795 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3796 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3797 ierr = PetscFree(dlens);CHKERRQ(ierr); 3798 } else { 3799 PetscInt ml,nl; 3800 3801 M = *newmat; 3802 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3803 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3804 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3805 /* 3806 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3807 rather than the slower MatSetValues(). 3808 */ 3809 M->was_assembled = PETSC_TRUE; 3810 M->assembled = PETSC_FALSE; 3811 } 3812 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3813 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3814 ii = aij->i; 3815 jj = aij->j; 3816 3817 /* trigger copy to CPU if needed */ 3818 ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3819 for (i=0; i<m; i++) { 3820 row = rstart + i; 3821 nz = ii[i+1] - ii[i]; 3822 cwork = jj; jj += nz; 3823 vwork = aa; aa += nz; 3824 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3825 } 3826 ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3827 3828 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3829 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3830 *newmat = M; 3831 3832 /* save submatrix used in processor for next request */ 3833 if (call == MAT_INITIAL_MATRIX) { 3834 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3835 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3836 } 3837 PetscFunctionReturn(0); 3838 } 3839 3840 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3841 { 3842 PetscInt m,cstart, cend,j,nnz,i,d; 3843 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3844 const PetscInt *JJ; 3845 PetscErrorCode ierr; 3846 PetscBool nooffprocentries; 3847 3848 PetscFunctionBegin; 3849 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3850 3851 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3852 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3853 m = B->rmap->n; 3854 cstart = B->cmap->rstart; 3855 cend = B->cmap->rend; 3856 rstart = B->rmap->rstart; 3857 3858 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3859 3860 if (PetscDefined(USE_DEBUG)) { 3861 for (i=0; i<m; i++) { 3862 nnz = Ii[i+1]- Ii[i]; 3863 JJ = J + Ii[i]; 3864 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3865 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3866 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3867 } 3868 } 3869 3870 for (i=0; i<m; i++) { 3871 nnz = Ii[i+1]- Ii[i]; 3872 JJ = J + Ii[i]; 3873 nnz_max = PetscMax(nnz_max,nnz); 3874 d = 0; 3875 for (j=0; j<nnz; j++) { 3876 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3877 } 3878 d_nnz[i] = d; 3879 o_nnz[i] = nnz - d; 3880 } 3881 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3882 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3883 3884 for (i=0; i<m; i++) { 3885 ii = i + rstart; 3886 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3887 } 3888 nooffprocentries = B->nooffprocentries; 3889 B->nooffprocentries = PETSC_TRUE; 3890 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3891 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3892 B->nooffprocentries = nooffprocentries; 3893 3894 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3895 PetscFunctionReturn(0); 3896 } 3897 3898 /*@ 3899 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3900 (the default parallel PETSc format). 3901 3902 Collective 3903 3904 Input Parameters: 3905 + B - the matrix 3906 . i - the indices into j for the start of each local row (starts with zero) 3907 . j - the column indices for each local row (starts with zero) 3908 - v - optional values in the matrix 3909 3910 Level: developer 3911 3912 Notes: 3913 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3914 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3915 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3916 3917 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3918 3919 The format which is used for the sparse matrix input, is equivalent to a 3920 row-major ordering.. i.e for the following matrix, the input data expected is 3921 as shown 3922 3923 $ 1 0 0 3924 $ 2 0 3 P0 3925 $ ------- 3926 $ 4 5 6 P1 3927 $ 3928 $ Process0 [P0]: rows_owned=[0,1] 3929 $ i = {0,1,3} [size = nrow+1 = 2+1] 3930 $ j = {0,0,2} [size = 3] 3931 $ v = {1,2,3} [size = 3] 3932 $ 3933 $ Process1 [P1]: rows_owned=[2] 3934 $ i = {0,3} [size = nrow+1 = 1+1] 3935 $ j = {0,1,2} [size = 3] 3936 $ v = {4,5,6} [size = 3] 3937 3938 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3939 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3940 @*/ 3941 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3942 { 3943 PetscErrorCode ierr; 3944 3945 PetscFunctionBegin; 3946 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3947 PetscFunctionReturn(0); 3948 } 3949 3950 /*@C 3951 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3952 (the default parallel PETSc format). For good matrix assembly performance 3953 the user should preallocate the matrix storage by setting the parameters 3954 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3955 performance can be increased by more than a factor of 50. 3956 3957 Collective 3958 3959 Input Parameters: 3960 + B - the matrix 3961 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3962 (same value is used for all local rows) 3963 . d_nnz - array containing the number of nonzeros in the various rows of the 3964 DIAGONAL portion of the local submatrix (possibly different for each row) 3965 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3966 The size of this array is equal to the number of local rows, i.e 'm'. 3967 For matrices that will be factored, you must leave room for (and set) 3968 the diagonal entry even if it is zero. 3969 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3970 submatrix (same value is used for all local rows). 3971 - o_nnz - array containing the number of nonzeros in the various rows of the 3972 OFF-DIAGONAL portion of the local submatrix (possibly different for 3973 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3974 structure. The size of this array is equal to the number 3975 of local rows, i.e 'm'. 3976 3977 If the *_nnz parameter is given then the *_nz parameter is ignored 3978 3979 The AIJ format (also called the Yale sparse matrix format or 3980 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3981 storage. The stored row and column indices begin with zero. 3982 See Users-Manual: ch_mat for details. 3983 3984 The parallel matrix is partitioned such that the first m0 rows belong to 3985 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3986 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3987 3988 The DIAGONAL portion of the local submatrix of a processor can be defined 3989 as the submatrix which is obtained by extraction the part corresponding to 3990 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3991 first row that belongs to the processor, r2 is the last row belonging to 3992 the this processor, and c1-c2 is range of indices of the local part of a 3993 vector suitable for applying the matrix to. This is an mxn matrix. In the 3994 common case of a square matrix, the row and column ranges are the same and 3995 the DIAGONAL part is also square. The remaining portion of the local 3996 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3997 3998 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3999 4000 You can call MatGetInfo() to get information on how effective the preallocation was; 4001 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4002 You can also run with the option -info and look for messages with the string 4003 malloc in them to see if additional memory allocation was needed. 4004 4005 Example usage: 4006 4007 Consider the following 8x8 matrix with 34 non-zero values, that is 4008 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4009 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4010 as follows: 4011 4012 .vb 4013 1 2 0 | 0 3 0 | 0 4 4014 Proc0 0 5 6 | 7 0 0 | 8 0 4015 9 0 10 | 11 0 0 | 12 0 4016 ------------------------------------- 4017 13 0 14 | 15 16 17 | 0 0 4018 Proc1 0 18 0 | 19 20 21 | 0 0 4019 0 0 0 | 22 23 0 | 24 0 4020 ------------------------------------- 4021 Proc2 25 26 27 | 0 0 28 | 29 0 4022 30 0 0 | 31 32 33 | 0 34 4023 .ve 4024 4025 This can be represented as a collection of submatrices as: 4026 4027 .vb 4028 A B C 4029 D E F 4030 G H I 4031 .ve 4032 4033 Where the submatrices A,B,C are owned by proc0, D,E,F are 4034 owned by proc1, G,H,I are owned by proc2. 4035 4036 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4037 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4038 The 'M','N' parameters are 8,8, and have the same values on all procs. 4039 4040 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4041 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4042 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4043 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4044 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4045 matrix, ans [DF] as another SeqAIJ matrix. 4046 4047 When d_nz, o_nz parameters are specified, d_nz storage elements are 4048 allocated for every row of the local diagonal submatrix, and o_nz 4049 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4050 One way to choose d_nz and o_nz is to use the max nonzerors per local 4051 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4052 In this case, the values of d_nz,o_nz are: 4053 .vb 4054 proc0 : dnz = 2, o_nz = 2 4055 proc1 : dnz = 3, o_nz = 2 4056 proc2 : dnz = 1, o_nz = 4 4057 .ve 4058 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4059 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4060 for proc3. i.e we are using 12+15+10=37 storage locations to store 4061 34 values. 4062 4063 When d_nnz, o_nnz parameters are specified, the storage is specified 4064 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4065 In the above case the values for d_nnz,o_nnz are: 4066 .vb 4067 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4068 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4069 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4070 .ve 4071 Here the space allocated is sum of all the above values i.e 34, and 4072 hence pre-allocation is perfect. 4073 4074 Level: intermediate 4075 4076 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4077 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4078 @*/ 4079 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4080 { 4081 PetscErrorCode ierr; 4082 4083 PetscFunctionBegin; 4084 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4085 PetscValidType(B,1); 4086 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4087 PetscFunctionReturn(0); 4088 } 4089 4090 /*@ 4091 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4092 CSR format for the local rows. 4093 4094 Collective 4095 4096 Input Parameters: 4097 + comm - MPI communicator 4098 . m - number of local rows (Cannot be PETSC_DECIDE) 4099 . n - This value should be the same as the local size used in creating the 4100 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4101 calculated if N is given) For square matrices n is almost always m. 4102 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4103 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4104 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4105 . j - column indices 4106 - a - matrix values 4107 4108 Output Parameter: 4109 . mat - the matrix 4110 4111 Level: intermediate 4112 4113 Notes: 4114 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4115 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4116 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4117 4118 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4119 4120 The format which is used for the sparse matrix input, is equivalent to a 4121 row-major ordering.. i.e for the following matrix, the input data expected is 4122 as shown 4123 4124 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4125 4126 $ 1 0 0 4127 $ 2 0 3 P0 4128 $ ------- 4129 $ 4 5 6 P1 4130 $ 4131 $ Process0 [P0]: rows_owned=[0,1] 4132 $ i = {0,1,3} [size = nrow+1 = 2+1] 4133 $ j = {0,0,2} [size = 3] 4134 $ v = {1,2,3} [size = 3] 4135 $ 4136 $ Process1 [P1]: rows_owned=[2] 4137 $ i = {0,3} [size = nrow+1 = 1+1] 4138 $ j = {0,1,2} [size = 3] 4139 $ v = {4,5,6} [size = 3] 4140 4141 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4142 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4143 @*/ 4144 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4145 { 4146 PetscErrorCode ierr; 4147 4148 PetscFunctionBegin; 4149 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4150 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4151 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4152 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4153 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4154 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4155 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4156 PetscFunctionReturn(0); 4157 } 4158 4159 /*@ 4160 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4161 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4162 4163 Collective 4164 4165 Input Parameters: 4166 + mat - the matrix 4167 . m - number of local rows (Cannot be PETSC_DECIDE) 4168 . n - This value should be the same as the local size used in creating the 4169 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4170 calculated if N is given) For square matrices n is almost always m. 4171 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4172 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4173 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4174 . J - column indices 4175 - v - matrix values 4176 4177 Level: intermediate 4178 4179 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4180 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4181 @*/ 4182 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4183 { 4184 PetscErrorCode ierr; 4185 PetscInt cstart,nnz,i,j; 4186 PetscInt *ld; 4187 PetscBool nooffprocentries; 4188 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4189 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4190 PetscScalar *ad = Ad->a, *ao = Ao->a; 4191 const PetscInt *Adi = Ad->i; 4192 PetscInt ldi,Iii,md; 4193 4194 PetscFunctionBegin; 4195 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4196 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4197 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4198 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4199 4200 cstart = mat->cmap->rstart; 4201 if (!Aij->ld) { 4202 /* count number of entries below block diagonal */ 4203 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4204 Aij->ld = ld; 4205 for (i=0; i<m; i++) { 4206 nnz = Ii[i+1]- Ii[i]; 4207 j = 0; 4208 while (J[j] < cstart && j < nnz) {j++;} 4209 J += nnz; 4210 ld[i] = j; 4211 } 4212 } else { 4213 ld = Aij->ld; 4214 } 4215 4216 for (i=0; i<m; i++) { 4217 nnz = Ii[i+1]- Ii[i]; 4218 Iii = Ii[i]; 4219 ldi = ld[i]; 4220 md = Adi[i+1]-Adi[i]; 4221 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4222 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4223 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4224 ad += md; 4225 ao += nnz - md; 4226 } 4227 nooffprocentries = mat->nooffprocentries; 4228 mat->nooffprocentries = PETSC_TRUE; 4229 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4230 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4231 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4232 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4233 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4234 mat->nooffprocentries = nooffprocentries; 4235 PetscFunctionReturn(0); 4236 } 4237 4238 /*@C 4239 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4240 (the default parallel PETSc format). For good matrix assembly performance 4241 the user should preallocate the matrix storage by setting the parameters 4242 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4243 performance can be increased by more than a factor of 50. 4244 4245 Collective 4246 4247 Input Parameters: 4248 + comm - MPI communicator 4249 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4250 This value should be the same as the local size used in creating the 4251 y vector for the matrix-vector product y = Ax. 4252 . n - This value should be the same as the local size used in creating the 4253 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4254 calculated if N is given) For square matrices n is almost always m. 4255 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4256 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4257 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4258 (same value is used for all local rows) 4259 . d_nnz - array containing the number of nonzeros in the various rows of the 4260 DIAGONAL portion of the local submatrix (possibly different for each row) 4261 or NULL, if d_nz is used to specify the nonzero structure. 4262 The size of this array is equal to the number of local rows, i.e 'm'. 4263 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4264 submatrix (same value is used for all local rows). 4265 - o_nnz - array containing the number of nonzeros in the various rows of the 4266 OFF-DIAGONAL portion of the local submatrix (possibly different for 4267 each row) or NULL, if o_nz is used to specify the nonzero 4268 structure. The size of this array is equal to the number 4269 of local rows, i.e 'm'. 4270 4271 Output Parameter: 4272 . A - the matrix 4273 4274 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4275 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4276 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4277 4278 Notes: 4279 If the *_nnz parameter is given then the *_nz parameter is ignored 4280 4281 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4282 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4283 storage requirements for this matrix. 4284 4285 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4286 processor than it must be used on all processors that share the object for 4287 that argument. 4288 4289 The user MUST specify either the local or global matrix dimensions 4290 (possibly both). 4291 4292 The parallel matrix is partitioned across processors such that the 4293 first m0 rows belong to process 0, the next m1 rows belong to 4294 process 1, the next m2 rows belong to process 2 etc.. where 4295 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4296 values corresponding to [m x N] submatrix. 4297 4298 The columns are logically partitioned with the n0 columns belonging 4299 to 0th partition, the next n1 columns belonging to the next 4300 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4301 4302 The DIAGONAL portion of the local submatrix on any given processor 4303 is the submatrix corresponding to the rows and columns m,n 4304 corresponding to the given processor. i.e diagonal matrix on 4305 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4306 etc. The remaining portion of the local submatrix [m x (N-n)] 4307 constitute the OFF-DIAGONAL portion. The example below better 4308 illustrates this concept. 4309 4310 For a square global matrix we define each processor's diagonal portion 4311 to be its local rows and the corresponding columns (a square submatrix); 4312 each processor's off-diagonal portion encompasses the remainder of the 4313 local matrix (a rectangular submatrix). 4314 4315 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4316 4317 When calling this routine with a single process communicator, a matrix of 4318 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4319 type of communicator, use the construction mechanism 4320 .vb 4321 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4322 .ve 4323 4324 $ MatCreate(...,&A); 4325 $ MatSetType(A,MATMPIAIJ); 4326 $ MatSetSizes(A, m,n,M,N); 4327 $ MatMPIAIJSetPreallocation(A,...); 4328 4329 By default, this format uses inodes (identical nodes) when possible. 4330 We search for consecutive rows with the same nonzero structure, thereby 4331 reusing matrix information to achieve increased efficiency. 4332 4333 Options Database Keys: 4334 + -mat_no_inode - Do not use inodes 4335 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4336 4337 4338 4339 Example usage: 4340 4341 Consider the following 8x8 matrix with 34 non-zero values, that is 4342 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4343 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4344 as follows 4345 4346 .vb 4347 1 2 0 | 0 3 0 | 0 4 4348 Proc0 0 5 6 | 7 0 0 | 8 0 4349 9 0 10 | 11 0 0 | 12 0 4350 ------------------------------------- 4351 13 0 14 | 15 16 17 | 0 0 4352 Proc1 0 18 0 | 19 20 21 | 0 0 4353 0 0 0 | 22 23 0 | 24 0 4354 ------------------------------------- 4355 Proc2 25 26 27 | 0 0 28 | 29 0 4356 30 0 0 | 31 32 33 | 0 34 4357 .ve 4358 4359 This can be represented as a collection of submatrices as 4360 4361 .vb 4362 A B C 4363 D E F 4364 G H I 4365 .ve 4366 4367 Where the submatrices A,B,C are owned by proc0, D,E,F are 4368 owned by proc1, G,H,I are owned by proc2. 4369 4370 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4371 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4372 The 'M','N' parameters are 8,8, and have the same values on all procs. 4373 4374 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4375 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4376 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4377 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4378 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4379 matrix, ans [DF] as another SeqAIJ matrix. 4380 4381 When d_nz, o_nz parameters are specified, d_nz storage elements are 4382 allocated for every row of the local diagonal submatrix, and o_nz 4383 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4384 One way to choose d_nz and o_nz is to use the max nonzerors per local 4385 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4386 In this case, the values of d_nz,o_nz are 4387 .vb 4388 proc0 : dnz = 2, o_nz = 2 4389 proc1 : dnz = 3, o_nz = 2 4390 proc2 : dnz = 1, o_nz = 4 4391 .ve 4392 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4393 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4394 for proc3. i.e we are using 12+15+10=37 storage locations to store 4395 34 values. 4396 4397 When d_nnz, o_nnz parameters are specified, the storage is specified 4398 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4399 In the above case the values for d_nnz,o_nnz are 4400 .vb 4401 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4402 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4403 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4404 .ve 4405 Here the space allocated is sum of all the above values i.e 34, and 4406 hence pre-allocation is perfect. 4407 4408 Level: intermediate 4409 4410 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4411 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4412 @*/ 4413 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4414 { 4415 PetscErrorCode ierr; 4416 PetscMPIInt size; 4417 4418 PetscFunctionBegin; 4419 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4420 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4421 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4422 if (size > 1) { 4423 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4424 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4425 } else { 4426 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4427 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4428 } 4429 PetscFunctionReturn(0); 4430 } 4431 4432 /*@C 4433 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4434 4435 Not collective 4436 4437 Input Parameter: 4438 . A - The MPIAIJ matrix 4439 4440 Output Parameters: 4441 + Ad - The local diagonal block as a SeqAIJ matrix 4442 . Ao - The local off-diagonal block as a SeqAIJ matrix 4443 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4444 4445 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4446 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4447 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4448 local column numbers to global column numbers in the original matrix. 4449 4450 Level: intermediate 4451 4452 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4453 @*/ 4454 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4455 { 4456 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4457 PetscBool flg; 4458 PetscErrorCode ierr; 4459 4460 PetscFunctionBegin; 4461 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4462 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4463 if (Ad) *Ad = a->A; 4464 if (Ao) *Ao = a->B; 4465 if (colmap) *colmap = a->garray; 4466 PetscFunctionReturn(0); 4467 } 4468 4469 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4470 { 4471 PetscErrorCode ierr; 4472 PetscInt m,N,i,rstart,nnz,Ii; 4473 PetscInt *indx; 4474 PetscScalar *values; 4475 4476 PetscFunctionBegin; 4477 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4478 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4479 PetscInt *dnz,*onz,sum,bs,cbs; 4480 4481 if (n == PETSC_DECIDE) { 4482 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4483 } 4484 /* Check sum(n) = N */ 4485 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4486 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4487 4488 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4489 rstart -= m; 4490 4491 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4492 for (i=0; i<m; i++) { 4493 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4494 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4495 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4496 } 4497 4498 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4499 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4500 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4501 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4502 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4503 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4504 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4505 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4506 } 4507 4508 /* numeric phase */ 4509 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4510 for (i=0; i<m; i++) { 4511 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4512 Ii = i + rstart; 4513 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4514 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4515 } 4516 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4517 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4518 PetscFunctionReturn(0); 4519 } 4520 4521 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4522 { 4523 PetscErrorCode ierr; 4524 PetscMPIInt rank; 4525 PetscInt m,N,i,rstart,nnz; 4526 size_t len; 4527 const PetscInt *indx; 4528 PetscViewer out; 4529 char *name; 4530 Mat B; 4531 const PetscScalar *values; 4532 4533 PetscFunctionBegin; 4534 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4535 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4536 /* Should this be the type of the diagonal block of A? */ 4537 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4538 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4539 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4540 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4541 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4542 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4543 for (i=0; i<m; i++) { 4544 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4545 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4546 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4547 } 4548 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4549 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4550 4551 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 4552 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4553 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4554 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4555 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4556 ierr = PetscFree(name);CHKERRQ(ierr); 4557 ierr = MatView(B,out);CHKERRQ(ierr); 4558 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4559 ierr = MatDestroy(&B);CHKERRQ(ierr); 4560 PetscFunctionReturn(0); 4561 } 4562 4563 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4564 { 4565 PetscErrorCode ierr; 4566 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4567 4568 PetscFunctionBegin; 4569 if (!merge) PetscFunctionReturn(0); 4570 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4571 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4572 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4573 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4574 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4575 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4576 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4577 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4578 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4579 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4580 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4581 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4582 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4583 ierr = PetscFree(merge);CHKERRQ(ierr); 4584 PetscFunctionReturn(0); 4585 } 4586 4587 #include <../src/mat/utils/freespace.h> 4588 #include <petscbt.h> 4589 4590 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4591 { 4592 PetscErrorCode ierr; 4593 MPI_Comm comm; 4594 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4595 PetscMPIInt size,rank,taga,*len_s; 4596 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4597 PetscInt proc,m; 4598 PetscInt **buf_ri,**buf_rj; 4599 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4600 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4601 MPI_Request *s_waits,*r_waits; 4602 MPI_Status *status; 4603 MatScalar *aa=a->a; 4604 MatScalar **abuf_r,*ba_i; 4605 Mat_Merge_SeqsToMPI *merge; 4606 PetscContainer container; 4607 4608 PetscFunctionBegin; 4609 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4610 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4611 4612 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4613 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4614 4615 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4616 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4617 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4618 4619 bi = merge->bi; 4620 bj = merge->bj; 4621 buf_ri = merge->buf_ri; 4622 buf_rj = merge->buf_rj; 4623 4624 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4625 owners = merge->rowmap->range; 4626 len_s = merge->len_s; 4627 4628 /* send and recv matrix values */ 4629 /*-----------------------------*/ 4630 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4631 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4632 4633 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4634 for (proc=0,k=0; proc<size; proc++) { 4635 if (!len_s[proc]) continue; 4636 i = owners[proc]; 4637 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr); 4638 k++; 4639 } 4640 4641 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);} 4642 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);} 4643 ierr = PetscFree(status);CHKERRQ(ierr); 4644 4645 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4646 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4647 4648 /* insert mat values of mpimat */ 4649 /*----------------------------*/ 4650 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4651 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4652 4653 for (k=0; k<merge->nrecv; k++) { 4654 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4655 nrows = *(buf_ri_k[k]); 4656 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4657 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4658 } 4659 4660 /* set values of ba */ 4661 m = merge->rowmap->n; 4662 for (i=0; i<m; i++) { 4663 arow = owners[rank] + i; 4664 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4665 bnzi = bi[i+1] - bi[i]; 4666 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4667 4668 /* add local non-zero vals of this proc's seqmat into ba */ 4669 anzi = ai[arow+1] - ai[arow]; 4670 aj = a->j + ai[arow]; 4671 aa = a->a + ai[arow]; 4672 nextaj = 0; 4673 for (j=0; nextaj<anzi; j++) { 4674 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4675 ba_i[j] += aa[nextaj++]; 4676 } 4677 } 4678 4679 /* add received vals into ba */ 4680 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4681 /* i-th row */ 4682 if (i == *nextrow[k]) { 4683 anzi = *(nextai[k]+1) - *nextai[k]; 4684 aj = buf_rj[k] + *(nextai[k]); 4685 aa = abuf_r[k] + *(nextai[k]); 4686 nextaj = 0; 4687 for (j=0; nextaj<anzi; j++) { 4688 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4689 ba_i[j] += aa[nextaj++]; 4690 } 4691 } 4692 nextrow[k]++; nextai[k]++; 4693 } 4694 } 4695 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4696 } 4697 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4698 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4699 4700 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4701 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4702 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4703 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4704 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4705 PetscFunctionReturn(0); 4706 } 4707 4708 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4709 { 4710 PetscErrorCode ierr; 4711 Mat B_mpi; 4712 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4713 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4714 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4715 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4716 PetscInt len,proc,*dnz,*onz,bs,cbs; 4717 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4718 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4719 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4720 MPI_Status *status; 4721 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4722 PetscBT lnkbt; 4723 Mat_Merge_SeqsToMPI *merge; 4724 PetscContainer container; 4725 4726 PetscFunctionBegin; 4727 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4728 4729 /* make sure it is a PETSc comm */ 4730 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4731 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4732 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4733 4734 ierr = PetscNew(&merge);CHKERRQ(ierr); 4735 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4736 4737 /* determine row ownership */ 4738 /*---------------------------------------------------------*/ 4739 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4740 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4741 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4742 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4743 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4744 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4745 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4746 4747 m = merge->rowmap->n; 4748 owners = merge->rowmap->range; 4749 4750 /* determine the number of messages to send, their lengths */ 4751 /*---------------------------------------------------------*/ 4752 len_s = merge->len_s; 4753 4754 len = 0; /* length of buf_si[] */ 4755 merge->nsend = 0; 4756 for (proc=0; proc<size; proc++) { 4757 len_si[proc] = 0; 4758 if (proc == rank) { 4759 len_s[proc] = 0; 4760 } else { 4761 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4762 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4763 } 4764 if (len_s[proc]) { 4765 merge->nsend++; 4766 nrows = 0; 4767 for (i=owners[proc]; i<owners[proc+1]; i++) { 4768 if (ai[i+1] > ai[i]) nrows++; 4769 } 4770 len_si[proc] = 2*(nrows+1); 4771 len += len_si[proc]; 4772 } 4773 } 4774 4775 /* determine the number and length of messages to receive for ij-structure */ 4776 /*-------------------------------------------------------------------------*/ 4777 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4778 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4779 4780 /* post the Irecv of j-structure */ 4781 /*-------------------------------*/ 4782 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4783 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4784 4785 /* post the Isend of j-structure */ 4786 /*--------------------------------*/ 4787 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4788 4789 for (proc=0, k=0; proc<size; proc++) { 4790 if (!len_s[proc]) continue; 4791 i = owners[proc]; 4792 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr); 4793 k++; 4794 } 4795 4796 /* receives and sends of j-structure are complete */ 4797 /*------------------------------------------------*/ 4798 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);} 4799 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);} 4800 4801 /* send and recv i-structure */ 4802 /*---------------------------*/ 4803 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4804 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4805 4806 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4807 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4808 for (proc=0,k=0; proc<size; proc++) { 4809 if (!len_s[proc]) continue; 4810 /* form outgoing message for i-structure: 4811 buf_si[0]: nrows to be sent 4812 [1:nrows]: row index (global) 4813 [nrows+1:2*nrows+1]: i-structure index 4814 */ 4815 /*-------------------------------------------*/ 4816 nrows = len_si[proc]/2 - 1; 4817 buf_si_i = buf_si + nrows+1; 4818 buf_si[0] = nrows; 4819 buf_si_i[0] = 0; 4820 nrows = 0; 4821 for (i=owners[proc]; i<owners[proc+1]; i++) { 4822 anzi = ai[i+1] - ai[i]; 4823 if (anzi) { 4824 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4825 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4826 nrows++; 4827 } 4828 } 4829 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr); 4830 k++; 4831 buf_si += len_si[proc]; 4832 } 4833 4834 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);} 4835 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);} 4836 4837 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4838 for (i=0; i<merge->nrecv; i++) { 4839 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4840 } 4841 4842 ierr = PetscFree(len_si);CHKERRQ(ierr); 4843 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4844 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4845 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4846 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4847 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4848 ierr = PetscFree(status);CHKERRQ(ierr); 4849 4850 /* compute a local seq matrix in each processor */ 4851 /*----------------------------------------------*/ 4852 /* allocate bi array and free space for accumulating nonzero column info */ 4853 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4854 bi[0] = 0; 4855 4856 /* create and initialize a linked list */ 4857 nlnk = N+1; 4858 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4859 4860 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4861 len = ai[owners[rank+1]] - ai[owners[rank]]; 4862 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4863 4864 current_space = free_space; 4865 4866 /* determine symbolic info for each local row */ 4867 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4868 4869 for (k=0; k<merge->nrecv; k++) { 4870 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4871 nrows = *buf_ri_k[k]; 4872 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4873 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4874 } 4875 4876 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4877 len = 0; 4878 for (i=0; i<m; i++) { 4879 bnzi = 0; 4880 /* add local non-zero cols of this proc's seqmat into lnk */ 4881 arow = owners[rank] + i; 4882 anzi = ai[arow+1] - ai[arow]; 4883 aj = a->j + ai[arow]; 4884 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4885 bnzi += nlnk; 4886 /* add received col data into lnk */ 4887 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4888 if (i == *nextrow[k]) { /* i-th row */ 4889 anzi = *(nextai[k]+1) - *nextai[k]; 4890 aj = buf_rj[k] + *nextai[k]; 4891 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4892 bnzi += nlnk; 4893 nextrow[k]++; nextai[k]++; 4894 } 4895 } 4896 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4897 4898 /* if free space is not available, make more free space */ 4899 if (current_space->local_remaining<bnzi) { 4900 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4901 nspacedouble++; 4902 } 4903 /* copy data into free space, then initialize lnk */ 4904 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4905 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4906 4907 current_space->array += bnzi; 4908 current_space->local_used += bnzi; 4909 current_space->local_remaining -= bnzi; 4910 4911 bi[i+1] = bi[i] + bnzi; 4912 } 4913 4914 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4915 4916 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4917 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4918 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4919 4920 /* create symbolic parallel matrix B_mpi */ 4921 /*---------------------------------------*/ 4922 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4923 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4924 if (n==PETSC_DECIDE) { 4925 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4926 } else { 4927 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4928 } 4929 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4930 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4931 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4932 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4933 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4934 4935 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4936 B_mpi->assembled = PETSC_FALSE; 4937 merge->bi = bi; 4938 merge->bj = bj; 4939 merge->buf_ri = buf_ri; 4940 merge->buf_rj = buf_rj; 4941 merge->coi = NULL; 4942 merge->coj = NULL; 4943 merge->owners_co = NULL; 4944 4945 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4946 4947 /* attach the supporting struct to B_mpi for reuse */ 4948 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4949 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4950 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 4951 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4952 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4953 *mpimat = B_mpi; 4954 4955 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4956 PetscFunctionReturn(0); 4957 } 4958 4959 /*@C 4960 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4961 matrices from each processor 4962 4963 Collective 4964 4965 Input Parameters: 4966 + comm - the communicators the parallel matrix will live on 4967 . seqmat - the input sequential matrices 4968 . m - number of local rows (or PETSC_DECIDE) 4969 . n - number of local columns (or PETSC_DECIDE) 4970 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4971 4972 Output Parameter: 4973 . mpimat - the parallel matrix generated 4974 4975 Level: advanced 4976 4977 Notes: 4978 The dimensions of the sequential matrix in each processor MUST be the same. 4979 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4980 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4981 @*/ 4982 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4983 { 4984 PetscErrorCode ierr; 4985 PetscMPIInt size; 4986 4987 PetscFunctionBegin; 4988 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4989 if (size == 1) { 4990 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4991 if (scall == MAT_INITIAL_MATRIX) { 4992 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4993 } else { 4994 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4995 } 4996 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4997 PetscFunctionReturn(0); 4998 } 4999 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5000 if (scall == MAT_INITIAL_MATRIX) { 5001 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5002 } 5003 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5004 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5005 PetscFunctionReturn(0); 5006 } 5007 5008 /*@ 5009 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5010 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5011 with MatGetSize() 5012 5013 Not Collective 5014 5015 Input Parameters: 5016 + A - the matrix 5017 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5018 5019 Output Parameter: 5020 . A_loc - the local sequential matrix generated 5021 5022 Level: developer 5023 5024 Notes: 5025 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5026 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5027 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5028 modify the values of the returned A_loc. 5029 5030 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 5031 @*/ 5032 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5033 { 5034 PetscErrorCode ierr; 5035 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5036 Mat_SeqAIJ *mat,*a,*b; 5037 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5038 const PetscScalar *aa,*ba,*aav,*bav; 5039 PetscScalar *ca,*cam; 5040 PetscMPIInt size; 5041 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5042 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5043 PetscBool match; 5044 5045 PetscFunctionBegin; 5046 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5047 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5048 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5049 if (size == 1) { 5050 if (scall == MAT_INITIAL_MATRIX) { 5051 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5052 *A_loc = mpimat->A; 5053 } else if (scall == MAT_REUSE_MATRIX) { 5054 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5055 } 5056 PetscFunctionReturn(0); 5057 } 5058 5059 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5060 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5061 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5062 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5063 ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5064 ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5065 aa = aav; 5066 ba = bav; 5067 if (scall == MAT_INITIAL_MATRIX) { 5068 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5069 ci[0] = 0; 5070 for (i=0; i<am; i++) { 5071 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5072 } 5073 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5074 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5075 k = 0; 5076 for (i=0; i<am; i++) { 5077 ncols_o = bi[i+1] - bi[i]; 5078 ncols_d = ai[i+1] - ai[i]; 5079 /* off-diagonal portion of A */ 5080 for (jo=0; jo<ncols_o; jo++) { 5081 col = cmap[*bj]; 5082 if (col >= cstart) break; 5083 cj[k] = col; bj++; 5084 ca[k++] = *ba++; 5085 } 5086 /* diagonal portion of A */ 5087 for (j=0; j<ncols_d; j++) { 5088 cj[k] = cstart + *aj++; 5089 ca[k++] = *aa++; 5090 } 5091 /* off-diagonal portion of A */ 5092 for (j=jo; j<ncols_o; j++) { 5093 cj[k] = cmap[*bj++]; 5094 ca[k++] = *ba++; 5095 } 5096 } 5097 /* put together the new matrix */ 5098 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5099 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5100 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5101 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5102 mat->free_a = PETSC_TRUE; 5103 mat->free_ij = PETSC_TRUE; 5104 mat->nonew = 0; 5105 } else if (scall == MAT_REUSE_MATRIX) { 5106 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5107 #if defined(PETSC_USE_DEVICE) 5108 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5109 #endif 5110 ci = mat->i; cj = mat->j; cam = mat->a; 5111 for (i=0; i<am; i++) { 5112 /* off-diagonal portion of A */ 5113 ncols_o = bi[i+1] - bi[i]; 5114 for (jo=0; jo<ncols_o; jo++) { 5115 col = cmap[*bj]; 5116 if (col >= cstart) break; 5117 *cam++ = *ba++; bj++; 5118 } 5119 /* diagonal portion of A */ 5120 ncols_d = ai[i+1] - ai[i]; 5121 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5122 /* off-diagonal portion of A */ 5123 for (j=jo; j<ncols_o; j++) { 5124 *cam++ = *ba++; bj++; 5125 } 5126 } 5127 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5128 ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5129 ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5130 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5131 PetscFunctionReturn(0); 5132 } 5133 5134 /*@ 5135 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5136 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5137 5138 Not Collective 5139 5140 Input Parameters: 5141 + A - the matrix 5142 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5143 5144 Output Parameter: 5145 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5146 - A_loc - the local sequential matrix generated 5147 5148 Level: developer 5149 5150 Notes: 5151 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5152 5153 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5154 5155 @*/ 5156 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5157 { 5158 PetscErrorCode ierr; 5159 Mat Ao,Ad; 5160 const PetscInt *cmap; 5161 PetscMPIInt size; 5162 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5163 5164 PetscFunctionBegin; 5165 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr); 5166 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 5167 if (size == 1) { 5168 if (scall == MAT_INITIAL_MATRIX) { 5169 ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr); 5170 *A_loc = Ad; 5171 } else if (scall == MAT_REUSE_MATRIX) { 5172 ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5173 } 5174 if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); } 5175 PetscFunctionReturn(0); 5176 } 5177 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr); 5178 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5179 if (f) { 5180 ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr); 5181 } else { 5182 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5183 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5184 Mat_SeqAIJ *c; 5185 PetscInt *ai = a->i, *aj = a->j; 5186 PetscInt *bi = b->i, *bj = b->j; 5187 PetscInt *ci,*cj; 5188 const PetscScalar *aa,*ba; 5189 PetscScalar *ca; 5190 PetscInt i,j,am,dn,on; 5191 5192 ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr); 5193 ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr); 5194 ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr); 5195 ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr); 5196 if (scall == MAT_INITIAL_MATRIX) { 5197 PetscInt k; 5198 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5199 ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr); 5200 ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr); 5201 ci[0] = 0; 5202 for (i=0,k=0; i<am; i++) { 5203 const PetscInt ncols_o = bi[i+1] - bi[i]; 5204 const PetscInt ncols_d = ai[i+1] - ai[i]; 5205 ci[i+1] = ci[i] + ncols_o + ncols_d; 5206 /* diagonal portion of A */ 5207 for (j=0; j<ncols_d; j++,k++) { 5208 cj[k] = *aj++; 5209 ca[k] = *aa++; 5210 } 5211 /* off-diagonal portion of A */ 5212 for (j=0; j<ncols_o; j++,k++) { 5213 cj[k] = dn + *bj++; 5214 ca[k] = *ba++; 5215 } 5216 } 5217 /* put together the new matrix */ 5218 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr); 5219 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5220 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5221 c = (Mat_SeqAIJ*)(*A_loc)->data; 5222 c->free_a = PETSC_TRUE; 5223 c->free_ij = PETSC_TRUE; 5224 c->nonew = 0; 5225 ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr); 5226 } else if (scall == MAT_REUSE_MATRIX) { 5227 #if defined(PETSC_HAVE_DEVICE) 5228 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5229 #endif 5230 c = (Mat_SeqAIJ*)(*A_loc)->data; 5231 ca = c->a; 5232 for (i=0; i<am; i++) { 5233 const PetscInt ncols_d = ai[i+1] - ai[i]; 5234 const PetscInt ncols_o = bi[i+1] - bi[i]; 5235 /* diagonal portion of A */ 5236 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5237 /* off-diagonal portion of A */ 5238 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5239 } 5240 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5241 ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr); 5242 ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr); 5243 if (glob) { 5244 PetscInt cst, *gidx; 5245 5246 ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr); 5247 ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr); 5248 for (i=0; i<dn; i++) gidx[i] = cst + i; 5249 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5250 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr); 5251 } 5252 } 5253 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5254 PetscFunctionReturn(0); 5255 } 5256 5257 /*@C 5258 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5259 5260 Not Collective 5261 5262 Input Parameters: 5263 + A - the matrix 5264 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5265 - row, col - index sets of rows and columns to extract (or NULL) 5266 5267 Output Parameter: 5268 . A_loc - the local sequential matrix generated 5269 5270 Level: developer 5271 5272 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5273 5274 @*/ 5275 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5276 { 5277 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5278 PetscErrorCode ierr; 5279 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5280 IS isrowa,iscola; 5281 Mat *aloc; 5282 PetscBool match; 5283 5284 PetscFunctionBegin; 5285 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5286 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5287 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5288 if (!row) { 5289 start = A->rmap->rstart; end = A->rmap->rend; 5290 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5291 } else { 5292 isrowa = *row; 5293 } 5294 if (!col) { 5295 start = A->cmap->rstart; 5296 cmap = a->garray; 5297 nzA = a->A->cmap->n; 5298 nzB = a->B->cmap->n; 5299 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5300 ncols = 0; 5301 for (i=0; i<nzB; i++) { 5302 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5303 else break; 5304 } 5305 imark = i; 5306 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5307 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5308 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5309 } else { 5310 iscola = *col; 5311 } 5312 if (scall != MAT_INITIAL_MATRIX) { 5313 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5314 aloc[0] = *A_loc; 5315 } 5316 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5317 if (!col) { /* attach global id of condensed columns */ 5318 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5319 } 5320 *A_loc = aloc[0]; 5321 ierr = PetscFree(aloc);CHKERRQ(ierr); 5322 if (!row) { 5323 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5324 } 5325 if (!col) { 5326 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5327 } 5328 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5329 PetscFunctionReturn(0); 5330 } 5331 5332 /* 5333 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5334 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5335 * on a global size. 5336 * */ 5337 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5338 { 5339 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5340 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5341 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5342 PetscMPIInt owner; 5343 PetscSFNode *iremote,*oiremote; 5344 const PetscInt *lrowindices; 5345 PetscErrorCode ierr; 5346 PetscSF sf,osf; 5347 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5348 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5349 MPI_Comm comm; 5350 ISLocalToGlobalMapping mapping; 5351 5352 PetscFunctionBegin; 5353 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5354 /* plocalsize is the number of roots 5355 * nrows is the number of leaves 5356 * */ 5357 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5358 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5359 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5360 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5361 for (i=0;i<nrows;i++) { 5362 /* Find a remote index and an owner for a row 5363 * The row could be local or remote 5364 * */ 5365 owner = 0; 5366 lidx = 0; 5367 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5368 iremote[i].index = lidx; 5369 iremote[i].rank = owner; 5370 } 5371 /* Create SF to communicate how many nonzero columns for each row */ 5372 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5373 /* SF will figure out the number of nonzero colunms for each row, and their 5374 * offsets 5375 * */ 5376 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5377 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5378 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5379 5380 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5381 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5382 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5383 roffsets[0] = 0; 5384 roffsets[1] = 0; 5385 for (i=0;i<plocalsize;i++) { 5386 /* diag */ 5387 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5388 /* off diag */ 5389 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5390 /* compute offsets so that we relative location for each row */ 5391 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5392 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5393 } 5394 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5395 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5396 /* 'r' means root, and 'l' means leaf */ 5397 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5398 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5399 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5400 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5401 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5402 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5403 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5404 dntotalcols = 0; 5405 ontotalcols = 0; 5406 ncol = 0; 5407 for (i=0;i<nrows;i++) { 5408 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5409 ncol = PetscMax(pnnz[i],ncol); 5410 /* diag */ 5411 dntotalcols += nlcols[i*2+0]; 5412 /* off diag */ 5413 ontotalcols += nlcols[i*2+1]; 5414 } 5415 /* We do not need to figure the right number of columns 5416 * since all the calculations will be done by going through the raw data 5417 * */ 5418 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5419 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5420 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5421 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5422 /* diag */ 5423 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5424 /* off diag */ 5425 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5426 /* diag */ 5427 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5428 /* off diag */ 5429 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5430 dntotalcols = 0; 5431 ontotalcols = 0; 5432 ntotalcols = 0; 5433 for (i=0;i<nrows;i++) { 5434 owner = 0; 5435 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5436 /* Set iremote for diag matrix */ 5437 for (j=0;j<nlcols[i*2+0];j++) { 5438 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5439 iremote[dntotalcols].rank = owner; 5440 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5441 ilocal[dntotalcols++] = ntotalcols++; 5442 } 5443 /* off diag */ 5444 for (j=0;j<nlcols[i*2+1];j++) { 5445 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5446 oiremote[ontotalcols].rank = owner; 5447 oilocal[ontotalcols++] = ntotalcols++; 5448 } 5449 } 5450 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5451 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5452 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5453 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5454 /* P serves as roots and P_oth is leaves 5455 * Diag matrix 5456 * */ 5457 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5458 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5459 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5460 5461 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5462 /* Off diag */ 5463 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5464 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5465 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5466 /* We operate on the matrix internal data for saving memory */ 5467 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5468 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5469 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5470 /* Convert to global indices for diag matrix */ 5471 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5472 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5473 /* We want P_oth store global indices */ 5474 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5475 /* Use memory scalable approach */ 5476 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5477 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5478 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5479 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5480 /* Convert back to local indices */ 5481 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5482 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5483 nout = 0; 5484 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5485 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5486 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5487 /* Exchange values */ 5488 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5489 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5490 /* Stop PETSc from shrinking memory */ 5491 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5492 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5493 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5494 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5495 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5496 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5497 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5498 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5499 PetscFunctionReturn(0); 5500 } 5501 5502 /* 5503 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5504 * This supports MPIAIJ and MAIJ 5505 * */ 5506 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5507 { 5508 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5509 Mat_SeqAIJ *p_oth; 5510 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5511 IS rows,map; 5512 PetscHMapI hamp; 5513 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5514 MPI_Comm comm; 5515 PetscSF sf,osf; 5516 PetscBool has; 5517 PetscErrorCode ierr; 5518 5519 PetscFunctionBegin; 5520 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5521 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5522 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5523 * and then create a submatrix (that often is an overlapping matrix) 5524 * */ 5525 if (reuse == MAT_INITIAL_MATRIX) { 5526 /* Use a hash table to figure out unique keys */ 5527 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5528 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5529 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5530 count = 0; 5531 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5532 for (i=0;i<a->B->cmap->n;i++) { 5533 key = a->garray[i]/dof; 5534 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5535 if (!has) { 5536 mapping[i] = count; 5537 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5538 } else { 5539 /* Current 'i' has the same value the previous step */ 5540 mapping[i] = count-1; 5541 } 5542 } 5543 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5544 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5545 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5546 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5547 off = 0; 5548 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5549 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5550 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5551 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5552 /* In case, the matrix was already created but users want to recreate the matrix */ 5553 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5554 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5555 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5556 ierr = ISDestroy(&map);CHKERRQ(ierr); 5557 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5558 } else if (reuse == MAT_REUSE_MATRIX) { 5559 /* If matrix was already created, we simply update values using SF objects 5560 * that as attached to the matrix ealier. 5561 * */ 5562 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5563 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5564 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5565 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5566 /* Update values in place */ 5567 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5568 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5569 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5570 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5571 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5572 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5573 PetscFunctionReturn(0); 5574 } 5575 5576 /*@C 5577 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5578 5579 Collective on Mat 5580 5581 Input Parameters: 5582 + A,B - the matrices in mpiaij format 5583 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5584 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5585 5586 Output Parameter: 5587 + rowb, colb - index sets of rows and columns of B to extract 5588 - B_seq - the sequential matrix generated 5589 5590 Level: developer 5591 5592 @*/ 5593 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5594 { 5595 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5596 PetscErrorCode ierr; 5597 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5598 IS isrowb,iscolb; 5599 Mat *bseq=NULL; 5600 5601 PetscFunctionBegin; 5602 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5603 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5604 } 5605 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5606 5607 if (scall == MAT_INITIAL_MATRIX) { 5608 start = A->cmap->rstart; 5609 cmap = a->garray; 5610 nzA = a->A->cmap->n; 5611 nzB = a->B->cmap->n; 5612 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5613 ncols = 0; 5614 for (i=0; i<nzB; i++) { /* row < local row index */ 5615 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5616 else break; 5617 } 5618 imark = i; 5619 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5620 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5621 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5622 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5623 } else { 5624 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5625 isrowb = *rowb; iscolb = *colb; 5626 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5627 bseq[0] = *B_seq; 5628 } 5629 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5630 *B_seq = bseq[0]; 5631 ierr = PetscFree(bseq);CHKERRQ(ierr); 5632 if (!rowb) { 5633 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5634 } else { 5635 *rowb = isrowb; 5636 } 5637 if (!colb) { 5638 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5639 } else { 5640 *colb = iscolb; 5641 } 5642 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5643 PetscFunctionReturn(0); 5644 } 5645 5646 /* 5647 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5648 of the OFF-DIAGONAL portion of local A 5649 5650 Collective on Mat 5651 5652 Input Parameters: 5653 + A,B - the matrices in mpiaij format 5654 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5655 5656 Output Parameter: 5657 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5658 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5659 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5660 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5661 5662 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5663 for this matrix. This is not desirable.. 5664 5665 Level: developer 5666 5667 */ 5668 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5669 { 5670 PetscErrorCode ierr; 5671 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5672 Mat_SeqAIJ *b_oth; 5673 VecScatter ctx; 5674 MPI_Comm comm; 5675 const PetscMPIInt *rprocs,*sprocs; 5676 const PetscInt *srow,*rstarts,*sstarts; 5677 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5678 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5679 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5680 MPI_Request *rwaits = NULL,*swaits = NULL; 5681 MPI_Status rstatus; 5682 PetscMPIInt size,tag,rank,nsends_mpi,nrecvs_mpi; 5683 PETSC_UNUSED PetscMPIInt jj; 5684 5685 PetscFunctionBegin; 5686 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5687 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5688 5689 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5690 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5691 } 5692 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5693 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 5694 5695 if (size == 1) { 5696 startsj_s = NULL; 5697 bufa_ptr = NULL; 5698 *B_oth = NULL; 5699 PetscFunctionReturn(0); 5700 } 5701 5702 ctx = a->Mvctx; 5703 tag = ((PetscObject)ctx)->tag; 5704 5705 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5706 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5707 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5708 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5709 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5710 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5711 5712 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5713 if (scall == MAT_INITIAL_MATRIX) { 5714 /* i-array */ 5715 /*---------*/ 5716 /* post receives */ 5717 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5718 for (i=0; i<nrecvs; i++) { 5719 rowlen = rvalues + rstarts[i]*rbs; 5720 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5721 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5722 } 5723 5724 /* pack the outgoing message */ 5725 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5726 5727 sstartsj[0] = 0; 5728 rstartsj[0] = 0; 5729 len = 0; /* total length of j or a array to be sent */ 5730 if (nsends) { 5731 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5732 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5733 } 5734 for (i=0; i<nsends; i++) { 5735 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5736 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5737 for (j=0; j<nrows; j++) { 5738 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5739 for (l=0; l<sbs; l++) { 5740 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5741 5742 rowlen[j*sbs+l] = ncols; 5743 5744 len += ncols; 5745 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5746 } 5747 k++; 5748 } 5749 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5750 5751 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5752 } 5753 /* recvs and sends of i-array are completed */ 5754 i = nrecvs; 5755 while (i--) { 5756 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr); 5757 } 5758 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5759 ierr = PetscFree(svalues);CHKERRQ(ierr); 5760 5761 /* allocate buffers for sending j and a arrays */ 5762 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5763 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5764 5765 /* create i-array of B_oth */ 5766 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5767 5768 b_othi[0] = 0; 5769 len = 0; /* total length of j or a array to be received */ 5770 k = 0; 5771 for (i=0; i<nrecvs; i++) { 5772 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5773 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5774 for (j=0; j<nrows; j++) { 5775 b_othi[k+1] = b_othi[k] + rowlen[j]; 5776 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5777 k++; 5778 } 5779 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5780 } 5781 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5782 5783 /* allocate space for j and a arrrays of B_oth */ 5784 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5785 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5786 5787 /* j-array */ 5788 /*---------*/ 5789 /* post receives of j-array */ 5790 for (i=0; i<nrecvs; i++) { 5791 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5792 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5793 } 5794 5795 /* pack the outgoing message j-array */ 5796 if (nsends) k = sstarts[0]; 5797 for (i=0; i<nsends; i++) { 5798 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5799 bufJ = bufj+sstartsj[i]; 5800 for (j=0; j<nrows; j++) { 5801 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5802 for (ll=0; ll<sbs; ll++) { 5803 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5804 for (l=0; l<ncols; l++) { 5805 *bufJ++ = cols[l]; 5806 } 5807 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5808 } 5809 } 5810 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5811 } 5812 5813 /* recvs and sends of j-array are completed */ 5814 i = nrecvs; 5815 while (i--) { 5816 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr); 5817 } 5818 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5819 } else if (scall == MAT_REUSE_MATRIX) { 5820 sstartsj = *startsj_s; 5821 rstartsj = *startsj_r; 5822 bufa = *bufa_ptr; 5823 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5824 b_otha = b_oth->a; 5825 #if defined(PETSC_HAVE_DEVICE) 5826 (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU; 5827 #endif 5828 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5829 5830 /* a-array */ 5831 /*---------*/ 5832 /* post receives of a-array */ 5833 for (i=0; i<nrecvs; i++) { 5834 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5835 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5836 } 5837 5838 /* pack the outgoing message a-array */ 5839 if (nsends) k = sstarts[0]; 5840 for (i=0; i<nsends; i++) { 5841 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5842 bufA = bufa+sstartsj[i]; 5843 for (j=0; j<nrows; j++) { 5844 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5845 for (ll=0; ll<sbs; ll++) { 5846 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5847 for (l=0; l<ncols; l++) { 5848 *bufA++ = vals[l]; 5849 } 5850 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5851 } 5852 } 5853 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5854 } 5855 /* recvs and sends of a-array are completed */ 5856 i = nrecvs; 5857 while (i--) { 5858 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr); 5859 } 5860 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5861 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5862 5863 if (scall == MAT_INITIAL_MATRIX) { 5864 /* put together the new matrix */ 5865 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5866 5867 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5868 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5869 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5870 b_oth->free_a = PETSC_TRUE; 5871 b_oth->free_ij = PETSC_TRUE; 5872 b_oth->nonew = 0; 5873 5874 ierr = PetscFree(bufj);CHKERRQ(ierr); 5875 if (!startsj_s || !bufa_ptr) { 5876 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5877 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5878 } else { 5879 *startsj_s = sstartsj; 5880 *startsj_r = rstartsj; 5881 *bufa_ptr = bufa; 5882 } 5883 } 5884 5885 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5886 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5887 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5888 PetscFunctionReturn(0); 5889 } 5890 5891 /*@C 5892 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5893 5894 Not Collective 5895 5896 Input Parameters: 5897 . A - The matrix in mpiaij format 5898 5899 Output Parameter: 5900 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5901 . colmap - A map from global column index to local index into lvec 5902 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5903 5904 Level: developer 5905 5906 @*/ 5907 #if defined(PETSC_USE_CTABLE) 5908 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5909 #else 5910 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5911 #endif 5912 { 5913 Mat_MPIAIJ *a; 5914 5915 PetscFunctionBegin; 5916 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5917 PetscValidPointer(lvec, 2); 5918 PetscValidPointer(colmap, 3); 5919 PetscValidPointer(multScatter, 4); 5920 a = (Mat_MPIAIJ*) A->data; 5921 if (lvec) *lvec = a->lvec; 5922 if (colmap) *colmap = a->colmap; 5923 if (multScatter) *multScatter = a->Mvctx; 5924 PetscFunctionReturn(0); 5925 } 5926 5927 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5928 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5929 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5930 #if defined(PETSC_HAVE_MKL_SPARSE) 5931 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5932 #endif 5933 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5934 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5935 #if defined(PETSC_HAVE_ELEMENTAL) 5936 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5937 #endif 5938 #if defined(PETSC_HAVE_SCALAPACK) 5939 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5940 #endif 5941 #if defined(PETSC_HAVE_HYPRE) 5942 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5943 #endif 5944 #if defined(PETSC_HAVE_CUDA) 5945 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5946 #endif 5947 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5948 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5949 #endif 5950 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5951 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5952 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5953 5954 /* 5955 Computes (B'*A')' since computing B*A directly is untenable 5956 5957 n p p 5958 [ ] [ ] [ ] 5959 m [ A ] * n [ B ] = m [ C ] 5960 [ ] [ ] [ ] 5961 5962 */ 5963 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5964 { 5965 PetscErrorCode ierr; 5966 Mat At,Bt,Ct; 5967 5968 PetscFunctionBegin; 5969 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5970 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5971 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5972 ierr = MatDestroy(&At);CHKERRQ(ierr); 5973 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5974 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5975 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5976 PetscFunctionReturn(0); 5977 } 5978 5979 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5980 { 5981 PetscErrorCode ierr; 5982 PetscBool cisdense; 5983 5984 PetscFunctionBegin; 5985 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5986 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 5987 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5988 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 5989 if (!cisdense) { 5990 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 5991 } 5992 ierr = MatSetUp(C);CHKERRQ(ierr); 5993 5994 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5995 PetscFunctionReturn(0); 5996 } 5997 5998 /* ----------------------------------------------------------------*/ 5999 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6000 { 6001 Mat_Product *product = C->product; 6002 Mat A = product->A,B=product->B; 6003 6004 PetscFunctionBegin; 6005 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6006 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6007 6008 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6009 C->ops->productsymbolic = MatProductSymbolic_AB; 6010 PetscFunctionReturn(0); 6011 } 6012 6013 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6014 { 6015 PetscErrorCode ierr; 6016 Mat_Product *product = C->product; 6017 6018 PetscFunctionBegin; 6019 if (product->type == MATPRODUCT_AB) { 6020 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 6021 } 6022 PetscFunctionReturn(0); 6023 } 6024 /* ----------------------------------------------------------------*/ 6025 6026 /*MC 6027 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6028 6029 Options Database Keys: 6030 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6031 6032 Level: beginner 6033 6034 Notes: 6035 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6036 in this case the values associated with the rows and columns one passes in are set to zero 6037 in the matrix 6038 6039 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6040 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6041 6042 .seealso: MatCreateAIJ() 6043 M*/ 6044 6045 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6046 { 6047 Mat_MPIAIJ *b; 6048 PetscErrorCode ierr; 6049 PetscMPIInt size; 6050 6051 PetscFunctionBegin; 6052 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 6053 6054 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6055 B->data = (void*)b; 6056 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6057 B->assembled = PETSC_FALSE; 6058 B->insertmode = NOT_SET_VALUES; 6059 b->size = size; 6060 6061 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr); 6062 6063 /* build cache for off array entries formed */ 6064 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6065 6066 b->donotstash = PETSC_FALSE; 6067 b->colmap = NULL; 6068 b->garray = NULL; 6069 b->roworiented = PETSC_TRUE; 6070 6071 /* stuff used for matrix vector multiply */ 6072 b->lvec = NULL; 6073 b->Mvctx = NULL; 6074 6075 /* stuff for MatGetRow() */ 6076 b->rowindices = NULL; 6077 b->rowvalues = NULL; 6078 b->getrowactive = PETSC_FALSE; 6079 6080 /* flexible pointer used in CUSPARSE classes */ 6081 b->spptr = NULL; 6082 6083 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6085 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6086 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6087 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6088 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6089 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6090 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6092 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6093 #if defined(PETSC_HAVE_CUDA) 6094 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr); 6095 #endif 6096 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr); 6098 #endif 6099 #if defined(PETSC_HAVE_MKL_SPARSE) 6100 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6101 #endif 6102 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6103 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6104 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6105 #if defined(PETSC_HAVE_ELEMENTAL) 6106 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6107 #endif 6108 #if defined(PETSC_HAVE_SCALAPACK) 6109 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6110 #endif 6111 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6112 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6113 #if defined(PETSC_HAVE_HYPRE) 6114 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6115 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6116 #endif 6117 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6118 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6119 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6120 PetscFunctionReturn(0); 6121 } 6122 6123 /*@C 6124 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6125 and "off-diagonal" part of the matrix in CSR format. 6126 6127 Collective 6128 6129 Input Parameters: 6130 + comm - MPI communicator 6131 . m - number of local rows (Cannot be PETSC_DECIDE) 6132 . n - This value should be the same as the local size used in creating the 6133 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6134 calculated if N is given) For square matrices n is almost always m. 6135 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6136 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6137 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6138 . j - column indices 6139 . a - matrix values 6140 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6141 . oj - column indices 6142 - oa - matrix values 6143 6144 Output Parameter: 6145 . mat - the matrix 6146 6147 Level: advanced 6148 6149 Notes: 6150 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6151 must free the arrays once the matrix has been destroyed and not before. 6152 6153 The i and j indices are 0 based 6154 6155 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6156 6157 This sets local rows and cannot be used to set off-processor values. 6158 6159 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6160 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6161 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6162 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6163 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6164 communication if it is known that only local entries will be set. 6165 6166 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6167 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6168 @*/ 6169 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6170 { 6171 PetscErrorCode ierr; 6172 Mat_MPIAIJ *maij; 6173 6174 PetscFunctionBegin; 6175 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6176 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6177 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6178 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6179 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6180 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6181 maij = (Mat_MPIAIJ*) (*mat)->data; 6182 6183 (*mat)->preallocated = PETSC_TRUE; 6184 6185 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6186 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6187 6188 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6189 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6190 6191 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6192 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6193 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6194 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6195 6196 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6197 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6198 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6199 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6200 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6201 PetscFunctionReturn(0); 6202 } 6203 6204 /* 6205 Special version for direct calls from Fortran 6206 */ 6207 #include <petsc/private/fortranimpl.h> 6208 6209 /* Change these macros so can be used in void function */ 6210 #undef CHKERRQ 6211 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6212 #undef SETERRQ2 6213 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6214 #undef SETERRQ3 6215 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6216 #undef SETERRQ 6217 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6218 6219 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6220 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6221 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6222 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6223 #else 6224 #endif 6225 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6226 { 6227 Mat mat = *mmat; 6228 PetscInt m = *mm, n = *mn; 6229 InsertMode addv = *maddv; 6230 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6231 PetscScalar value; 6232 PetscErrorCode ierr; 6233 6234 MatCheckPreallocated(mat,1); 6235 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6236 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6237 { 6238 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6239 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6240 PetscBool roworiented = aij->roworiented; 6241 6242 /* Some Variables required in the macro */ 6243 Mat A = aij->A; 6244 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6245 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6246 MatScalar *aa = a->a; 6247 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6248 Mat B = aij->B; 6249 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6250 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6251 MatScalar *ba = b->a; 6252 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6253 * cannot use "#if defined" inside a macro. */ 6254 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6255 6256 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6257 PetscInt nonew = a->nonew; 6258 MatScalar *ap1,*ap2; 6259 6260 PetscFunctionBegin; 6261 for (i=0; i<m; i++) { 6262 if (im[i] < 0) continue; 6263 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6264 if (im[i] >= rstart && im[i] < rend) { 6265 row = im[i] - rstart; 6266 lastcol1 = -1; 6267 rp1 = aj + ai[row]; 6268 ap1 = aa + ai[row]; 6269 rmax1 = aimax[row]; 6270 nrow1 = ailen[row]; 6271 low1 = 0; 6272 high1 = nrow1; 6273 lastcol2 = -1; 6274 rp2 = bj + bi[row]; 6275 ap2 = ba + bi[row]; 6276 rmax2 = bimax[row]; 6277 nrow2 = bilen[row]; 6278 low2 = 0; 6279 high2 = nrow2; 6280 6281 for (j=0; j<n; j++) { 6282 if (roworiented) value = v[i*n+j]; 6283 else value = v[i+j*m]; 6284 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6285 if (in[j] >= cstart && in[j] < cend) { 6286 col = in[j] - cstart; 6287 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6288 #if defined(PETSC_HAVE_DEVICE) 6289 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6290 #endif 6291 } else if (in[j] < 0) continue; 6292 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6293 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6294 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6295 } else { 6296 if (mat->was_assembled) { 6297 if (!aij->colmap) { 6298 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6299 } 6300 #if defined(PETSC_USE_CTABLE) 6301 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6302 col--; 6303 #else 6304 col = aij->colmap[in[j]] - 1; 6305 #endif 6306 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6307 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6308 col = in[j]; 6309 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6310 B = aij->B; 6311 b = (Mat_SeqAIJ*)B->data; 6312 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6313 rp2 = bj + bi[row]; 6314 ap2 = ba + bi[row]; 6315 rmax2 = bimax[row]; 6316 nrow2 = bilen[row]; 6317 low2 = 0; 6318 high2 = nrow2; 6319 bm = aij->B->rmap->n; 6320 ba = b->a; 6321 inserted = PETSC_FALSE; 6322 } 6323 } else col = in[j]; 6324 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6325 #if defined(PETSC_HAVE_DEVICE) 6326 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6327 #endif 6328 } 6329 } 6330 } else if (!aij->donotstash) { 6331 if (roworiented) { 6332 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6333 } else { 6334 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6335 } 6336 } 6337 } 6338 } 6339 PetscFunctionReturnVoid(); 6340 } 6341 6342 typedef struct { 6343 Mat *mp; /* intermediate products */ 6344 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6345 PetscInt cp; /* number of intermediate products */ 6346 6347 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6348 PetscInt *startsj_s,*startsj_r; 6349 PetscScalar *bufa; 6350 Mat P_oth; 6351 6352 /* may take advantage of merging product->B */ 6353 Mat Bloc; 6354 6355 /* cusparse does not have support to split between symbolic and numeric phases 6356 When api_user is true, we don't need to update the numerical values 6357 of the temporary storage */ 6358 PetscBool reusesym; 6359 6360 /* support for COO values insertion */ 6361 PetscScalar *coo_v,*coo_w; 6362 PetscInt **own; 6363 PetscInt **off; 6364 PetscBool hasoffproc; /* if true, non-local values insertion (i.e. AtB or PtAP) */ 6365 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6366 PetscMemType mtype; 6367 6368 /* customization */ 6369 PetscBool abmerge; 6370 PetscBool P_oth_bind; 6371 } MatMatMPIAIJBACKEND; 6372 6373 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6374 { 6375 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6376 PetscInt i; 6377 PetscErrorCode ierr; 6378 6379 PetscFunctionBegin; 6380 ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr); 6381 ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr); 6382 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr); 6383 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr); 6384 ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr); 6385 ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr); 6386 ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr); 6387 for (i = 0; i < mmdata->cp; i++) { 6388 ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr); 6389 } 6390 ierr = PetscFree(mmdata->mp);CHKERRQ(ierr); 6391 ierr = PetscFree(mmdata->mptmp);CHKERRQ(ierr); 6392 ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr); 6393 ierr = PetscFree(mmdata->own);CHKERRQ(ierr); 6394 ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr); 6395 ierr = PetscFree(mmdata->off);CHKERRQ(ierr); 6396 ierr = PetscFree(mmdata);CHKERRQ(ierr); 6397 PetscFunctionReturn(0); 6398 } 6399 6400 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6401 { 6402 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6403 PetscErrorCode ierr; 6404 6405 PetscFunctionBegin; 6406 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr); 6407 if (f) { 6408 ierr = (*f)(A,n,idx,v);CHKERRQ(ierr); 6409 } else { 6410 const PetscScalar *vv; 6411 6412 ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr); 6413 if (n && idx) { 6414 PetscScalar *w = v; 6415 const PetscInt *oi = idx; 6416 PetscInt j; 6417 6418 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6419 } else { 6420 ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr); 6421 } 6422 ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr); 6423 } 6424 PetscFunctionReturn(0); 6425 } 6426 6427 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6428 { 6429 MatMatMPIAIJBACKEND *mmdata; 6430 PetscInt i,n_d,n_o; 6431 PetscErrorCode ierr; 6432 6433 PetscFunctionBegin; 6434 MatCheckProduct(C,1); 6435 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6436 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6437 if (!mmdata->reusesym) { /* update temporary matrices */ 6438 if (mmdata->P_oth) { 6439 ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6440 } 6441 if (mmdata->Bloc) { 6442 ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr); 6443 } 6444 } 6445 mmdata->reusesym = PETSC_FALSE; 6446 6447 for (i = 0; i < mmdata->cp; i++) { 6448 if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6449 ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr); 6450 } 6451 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6452 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6453 6454 if (mmdata->mptmp[i]) continue; 6455 if (noff) { 6456 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6457 6458 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr); 6459 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr); 6460 n_o += noff; 6461 n_d += nown; 6462 } else { 6463 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6464 6465 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr); 6466 n_d += mm->nz; 6467 } 6468 } 6469 if (mmdata->hasoffproc) { /* offprocess insertion */ 6470 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6471 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6472 } 6473 ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr); 6474 PetscFunctionReturn(0); 6475 } 6476 6477 /* Support for Pt * A, A * P, or Pt * A * P */ 6478 #define MAX_NUMBER_INTERMEDIATE 4 6479 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6480 { 6481 Mat_Product *product = C->product; 6482 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; 6483 Mat_MPIAIJ *a,*p; 6484 MatMatMPIAIJBACKEND *mmdata; 6485 ISLocalToGlobalMapping P_oth_l2g = NULL; 6486 IS glob = NULL; 6487 const char *prefix; 6488 char pprefix[256]; 6489 const PetscInt *globidx,*P_oth_idx; 6490 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; 6491 PetscInt cp = 0,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j,cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE],i,j; 6492 MatProductType ptype; 6493 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6494 PetscMPIInt size; 6495 PetscErrorCode ierr; 6496 6497 PetscFunctionBegin; 6498 MatCheckProduct(C,1); 6499 if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6500 ptype = product->type; 6501 if (product->A->symmetric && ptype == MATPRODUCT_AtB) ptype = MATPRODUCT_AB; 6502 switch (ptype) { 6503 case MATPRODUCT_AB: 6504 A = product->A; 6505 P = product->B; 6506 m = A->rmap->n; 6507 n = P->cmap->n; 6508 M = A->rmap->N; 6509 N = P->cmap->N; 6510 break; 6511 case MATPRODUCT_AtB: 6512 P = product->A; 6513 A = product->B; 6514 m = P->cmap->n; 6515 n = A->cmap->n; 6516 M = P->cmap->N; 6517 N = A->cmap->N; 6518 hasoffproc = PETSC_TRUE; 6519 break; 6520 case MATPRODUCT_PtAP: 6521 A = product->A; 6522 P = product->B; 6523 m = P->cmap->n; 6524 n = P->cmap->n; 6525 M = P->cmap->N; 6526 N = P->cmap->N; 6527 hasoffproc = PETSC_TRUE; 6528 break; 6529 default: 6530 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6531 } 6532 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRQ(ierr); 6533 if (size == 1) hasoffproc = PETSC_FALSE; 6534 6535 /* defaults */ 6536 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6537 mp[i] = NULL; 6538 mptmp[i] = PETSC_FALSE; 6539 rmapt[i] = -1; 6540 cmapt[i] = -1; 6541 rmapa[i] = NULL; 6542 cmapa[i] = NULL; 6543 } 6544 6545 /* customization */ 6546 ierr = PetscNew(&mmdata);CHKERRQ(ierr); 6547 mmdata->reusesym = product->api_user; 6548 if (ptype == MATPRODUCT_AB) { 6549 if (product->api_user) { 6550 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 6551 ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6552 ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6553 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6554 } else { 6555 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 6556 ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6557 ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6558 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6559 } 6560 } else if (ptype == MATPRODUCT_PtAP) { 6561 if (product->api_user) { 6562 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 6563 ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6564 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6565 } else { 6566 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 6567 ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6568 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6569 } 6570 } 6571 a = (Mat_MPIAIJ*)A->data; 6572 p = (Mat_MPIAIJ*)P->data; 6573 ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr); 6574 ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr); 6575 ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr); 6576 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6577 ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr); 6578 switch (ptype) { 6579 case MATPRODUCT_AB: /* A * P */ 6580 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6581 6582 if (mmdata->abmerge) { /* A_diag * P_loc and A_off * P_oth */ 6583 /* P is product->B */ 6584 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6585 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6586 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6587 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6588 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6589 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6590 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6591 mp[cp]->product->api_user = product->api_user; 6592 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6593 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6594 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6595 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6596 rmapt[cp] = 1; 6597 cmapt[cp] = 2; 6598 cmapa[cp] = globidx; 6599 mptmp[cp] = PETSC_FALSE; 6600 cp++; 6601 } else { /* A_diag * P_diag and A_diag * P_off and A_off * P_oth */ 6602 ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr); 6603 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6604 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6605 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6606 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6607 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6608 mp[cp]->product->api_user = product->api_user; 6609 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6610 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6611 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6612 rmapt[cp] = 1; 6613 cmapt[cp] = 1; 6614 mptmp[cp] = PETSC_FALSE; 6615 cp++; 6616 ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr); 6617 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6618 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6619 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6620 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6621 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6622 mp[cp]->product->api_user = product->api_user; 6623 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6624 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6625 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6626 rmapt[cp] = 1; 6627 cmapt[cp] = 2; 6628 cmapa[cp] = p->garray; 6629 mptmp[cp] = PETSC_FALSE; 6630 cp++; 6631 } 6632 if (mmdata->P_oth) { 6633 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); 6634 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6635 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6636 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6637 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6638 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6639 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6640 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6641 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6642 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6643 mp[cp]->product->api_user = product->api_user; 6644 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6645 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6646 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6647 rmapt[cp] = 1; 6648 cmapt[cp] = 2; 6649 cmapa[cp] = P_oth_idx; 6650 mptmp[cp] = PETSC_FALSE; 6651 cp++; 6652 } 6653 break; 6654 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 6655 /* A is product->B */ 6656 ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6657 if (A == P) { 6658 ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6659 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6660 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6661 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6662 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6663 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6664 mp[cp]->product->api_user = product->api_user; 6665 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6666 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6667 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6668 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6669 rmapt[cp] = 2; 6670 rmapa[cp] = globidx; 6671 cmapt[cp] = 2; 6672 cmapa[cp] = globidx; 6673 mptmp[cp] = PETSC_FALSE; 6674 cp++; 6675 } else { 6676 ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6677 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6678 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6679 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6680 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6681 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6682 mp[cp]->product->api_user = product->api_user; 6683 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6684 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6685 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6686 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6687 rmapt[cp] = 1; 6688 cmapt[cp] = 2; 6689 cmapa[cp] = globidx; 6690 mptmp[cp] = PETSC_FALSE; 6691 cp++; 6692 ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6693 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6694 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6695 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6696 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6697 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6698 mp[cp]->product->api_user = product->api_user; 6699 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6700 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6701 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6702 rmapt[cp] = 2; 6703 rmapa[cp] = p->garray; 6704 cmapt[cp] = 2; 6705 cmapa[cp] = globidx; 6706 mptmp[cp] = PETSC_FALSE; 6707 cp++; 6708 } 6709 break; 6710 case MATPRODUCT_PtAP: 6711 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6712 /* P is product->B */ 6713 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6714 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6715 ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr); 6716 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6717 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6718 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6719 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6720 mp[cp]->product->api_user = product->api_user; 6721 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6722 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6723 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6724 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6725 rmapt[cp] = 2; 6726 rmapa[cp] = globidx; 6727 cmapt[cp] = 2; 6728 cmapa[cp] = globidx; 6729 mptmp[cp] = PETSC_FALSE; 6730 cp++; 6731 if (mmdata->P_oth) { 6732 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); 6733 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6734 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6735 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6736 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6737 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6738 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6739 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6740 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6741 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6742 mp[cp]->product->api_user = product->api_user; 6743 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6744 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6745 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6746 mptmp[cp] = PETSC_TRUE; 6747 cp++; 6748 ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr); 6749 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6750 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6751 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6752 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6753 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6754 mp[cp]->product->api_user = product->api_user; 6755 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6756 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6757 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6758 rmapt[cp] = 2; 6759 rmapa[cp] = globidx; 6760 cmapt[cp] = 2; 6761 cmapa[cp] = P_oth_idx; 6762 mptmp[cp] = PETSC_FALSE; 6763 cp++; 6764 } 6765 break; 6766 default: 6767 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6768 } 6769 /* sanity check */ 6770 if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i); 6771 6772 ierr = PetscMalloc1(cp,&mmdata->mp);CHKERRQ(ierr); 6773 for (i = 0; i < cp; i++) mmdata->mp[i] = mp[i]; 6774 ierr = PetscMalloc1(cp,&mmdata->mptmp);CHKERRQ(ierr); 6775 for (i = 0; i < cp; i++) mmdata->mptmp[i] = mptmp[i]; 6776 mmdata->cp = cp; 6777 C->product->data = mmdata; 6778 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 6779 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 6780 6781 /* memory type */ 6782 mmdata->mtype = PETSC_MEMTYPE_HOST; 6783 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr); 6784 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr); 6785 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 6786 // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented 6787 //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE; 6788 6789 /* prepare coo coordinates for values insertion */ 6790 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 6791 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6792 if (mptmp[cp]) continue; 6793 if (rmapt[cp] == 2 && hasoffproc) { 6794 const PetscInt *rmap = rmapa[cp]; 6795 const PetscInt mr = mp[cp]->rmap->n; 6796 const PetscInt rs = C->rmap->rstart; 6797 const PetscInt re = C->rmap->rend; 6798 const PetscInt *ii = mm->i; 6799 for (i = 0; i < mr; i++) { 6800 const PetscInt gr = rmap[i]; 6801 const PetscInt nz = ii[i+1] - ii[i]; 6802 if (gr < rs || gr >= re) ncoo_o += nz; 6803 else ncoo_oown += nz; 6804 } 6805 } else ncoo_d += mm->nz; 6806 } 6807 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); 6808 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr); 6809 if (hasoffproc) { /* handle offproc values insertion */ 6810 PetscSF msf; 6811 PetscInt ncoo2,*coo_i2,*coo_j2; 6812 6813 ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr); 6814 ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr); 6815 ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); 6816 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 6817 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6818 PetscInt *idxoff = mmdata->off[cp]; 6819 PetscInt *idxown = mmdata->own[cp]; 6820 if (!mptmp[cp] && rmapt[cp] == 2) { 6821 const PetscInt *rmap = rmapa[cp]; 6822 const PetscInt *cmap = cmapa[cp]; 6823 const PetscInt *ii = mm->i; 6824 PetscInt *coi = coo_i + ncoo_o; 6825 PetscInt *coj = coo_j + ncoo_o; 6826 const PetscInt mr = mp[cp]->rmap->n; 6827 const PetscInt rs = C->rmap->rstart; 6828 const PetscInt re = C->rmap->rend; 6829 const PetscInt cs = C->cmap->rstart; 6830 for (i = 0; i < mr; i++) { 6831 const PetscInt *jj = mm->j + ii[i]; 6832 const PetscInt gr = rmap[i]; 6833 const PetscInt nz = ii[i+1] - ii[i]; 6834 if (gr < rs || gr >= re) { 6835 for (j = ii[i]; j < ii[i+1]; j++) { 6836 *coi++ = gr; 6837 *idxoff++ = j; 6838 } 6839 if (!cmapt[cp]) { /* already global */ 6840 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6841 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6842 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6843 } else { /* offdiag */ 6844 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6845 } 6846 ncoo_o += nz; 6847 } else { 6848 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 6849 } 6850 } 6851 } 6852 mmdata->off[cp + 1] = idxoff; 6853 mmdata->own[cp + 1] = idxown; 6854 } 6855 6856 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6857 ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o,NULL,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr); 6858 ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr); 6859 ierr = PetscSFGetGraph(msf,&ncoo2,NULL,NULL,NULL);CHKERRQ(ierr); 6860 ncoo = ncoo_d + ncoo_oown + ncoo2; 6861 ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr); 6862 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6863 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6864 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6865 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6866 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6867 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr); 6868 coo_i = coo_i2; 6869 coo_j = coo_j2; 6870 } else { /* no offproc values insertion */ 6871 ncoo = ncoo_d; 6872 ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr); 6873 6874 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6875 ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr); 6876 ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr); 6877 } 6878 mmdata->hasoffproc = hasoffproc; 6879 6880 /* on-process indices */ 6881 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 6882 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6883 PetscInt *coi = coo_i + ncoo_d; 6884 PetscInt *coj = coo_j + ncoo_d; 6885 const PetscInt *jj = mm->j; 6886 const PetscInt *ii = mm->i; 6887 const PetscInt *cmap = cmapa[cp]; 6888 const PetscInt *rmap = rmapa[cp]; 6889 const PetscInt mr = mp[cp]->rmap->n; 6890 const PetscInt rs = C->rmap->rstart; 6891 const PetscInt re = C->rmap->rend; 6892 const PetscInt cs = C->cmap->rstart; 6893 6894 if (mptmp[cp]) continue; 6895 if (rmapt[cp] == 1) { 6896 for (i = 0; i < mr; i++) { 6897 const PetscInt gr = i + rs; 6898 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 6899 } 6900 /* columns coo */ 6901 if (!cmapt[cp]) { 6902 ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr); 6903 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6904 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; 6905 } else { /* offdiag */ 6906 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 6907 } 6908 ncoo_d += mm->nz; 6909 } else if (rmapt[cp] == 2) { 6910 for (i = 0; i < mr; i++) { 6911 const PetscInt *jj = mm->j + ii[i]; 6912 const PetscInt gr = rmap[i]; 6913 const PetscInt nz = ii[i+1] - ii[i]; 6914 if (gr >= rs && gr < re) { 6915 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 6916 if (!cmapt[cp]) { /* already global */ 6917 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6918 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6919 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6920 } else { /* offdiag */ 6921 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6922 } 6923 ncoo_d += nz; 6924 } 6925 } 6926 } 6927 } 6928 if (glob) { 6929 ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr); 6930 } 6931 ierr = ISDestroy(&glob);CHKERRQ(ierr); 6932 if (P_oth_l2g) { 6933 ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6934 } 6935 ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr); 6936 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr); 6937 6938 /* preallocate with COO data */ 6939 ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr); 6940 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6941 PetscFunctionReturn(0); 6942 } 6943 6944 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 6945 { 6946 Mat_Product *product = mat->product; 6947 PetscErrorCode ierr; 6948 #if defined(PETSC_HAVE_DEVICE) 6949 PetscBool match = PETSC_FALSE; 6950 PetscBool usecpu = PETSC_FALSE; 6951 #else 6952 PetscBool match = PETSC_TRUE; 6953 #endif 6954 6955 PetscFunctionBegin; 6956 MatCheckProduct(mat,1); 6957 #if defined(PETSC_HAVE_DEVICE) 6958 if (!product->A->boundtocpu && !product->B->boundtocpu) { 6959 ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr); 6960 } 6961 if (match) { /* we can always fallback to CPU in case an operation is not performing on the device */ 6962 switch (product->type) { 6963 case MATPRODUCT_AB: 6964 if (product->api_user) { 6965 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 6966 ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6967 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6968 } else { 6969 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 6970 ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6971 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6972 } 6973 break; 6974 case MATPRODUCT_AtB: 6975 if (product->api_user) { 6976 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr); 6977 ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6978 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6979 } else { 6980 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr); 6981 ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6982 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6983 } 6984 break; 6985 case MATPRODUCT_PtAP: 6986 if (product->api_user) { 6987 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 6988 ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6989 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6990 } else { 6991 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 6992 ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 6993 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6994 } 6995 break; 6996 default: 6997 break; 6998 } 6999 match = (PetscBool)!usecpu; 7000 } 7001 #endif 7002 if (match) { 7003 switch (product->type) { 7004 case MATPRODUCT_AB: 7005 case MATPRODUCT_AtB: 7006 case MATPRODUCT_PtAP: 7007 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7008 break; 7009 default: 7010 break; 7011 } 7012 } 7013 /* fallback to MPIAIJ ops */ 7014 if (!mat->ops->productsymbolic) { 7015 ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr); 7016 } 7017 PetscFunctionReturn(0); 7018 } 7019