1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 66 { 67 PetscErrorCode ierr; 68 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 69 70 PetscFunctionBegin; 71 if (mat->A) { 72 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 73 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 74 } 75 PetscFunctionReturn(0); 76 } 77 78 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 79 { 80 PetscErrorCode ierr; 81 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 82 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 83 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 84 const PetscInt *ia,*ib; 85 const MatScalar *aa,*bb,*aav,*bav; 86 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 87 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 88 89 PetscFunctionBegin; 90 *keptrows = NULL; 91 92 ia = a->i; 93 ib = b->i; 94 ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr); 95 ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr); 96 for (i=0; i<m; i++) { 97 na = ia[i+1] - ia[i]; 98 nb = ib[i+1] - ib[i]; 99 if (!na && !nb) { 100 cnt++; 101 goto ok1; 102 } 103 aa = aav + ia[i]; 104 for (j=0; j<na; j++) { 105 if (aa[j] != 0.0) goto ok1; 106 } 107 bb = bav + ib[i]; 108 for (j=0; j <nb; j++) { 109 if (bb[j] != 0.0) goto ok1; 110 } 111 cnt++; 112 ok1:; 113 } 114 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr); 115 if (!n0rows) { 116 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 117 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 121 cnt = 0; 122 for (i=0; i<m; i++) { 123 na = ia[i+1] - ia[i]; 124 nb = ib[i+1] - ib[i]; 125 if (!na && !nb) continue; 126 aa = aav + ia[i]; 127 for (j=0; j<na;j++) { 128 if (aa[j] != 0.0) { 129 rows[cnt++] = rstart + i; 130 goto ok2; 131 } 132 } 133 bb = bav + ib[i]; 134 for (j=0; j<nb; j++) { 135 if (bb[j] != 0.0) { 136 rows[cnt++] = rstart + i; 137 goto ok2; 138 } 139 } 140 ok2:; 141 } 142 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 143 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 144 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 148 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 149 { 150 PetscErrorCode ierr; 151 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 152 PetscBool cong; 153 154 PetscFunctionBegin; 155 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 156 if (Y->assembled && cong) { 157 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 158 } else { 159 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 160 } 161 PetscFunctionReturn(0); 162 } 163 164 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 165 { 166 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 167 PetscErrorCode ierr; 168 PetscInt i,rstart,nrows,*rows; 169 170 PetscFunctionBegin; 171 *zrows = NULL; 172 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 173 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 174 for (i=0; i<nrows; i++) rows[i] += rstart; 175 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 176 PetscFunctionReturn(0); 177 } 178 179 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 180 { 181 PetscErrorCode ierr; 182 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 183 PetscInt i,m,n,*garray = aij->garray; 184 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 185 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 186 PetscReal *work; 187 const PetscScalar *dummy; 188 189 PetscFunctionBegin; 190 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 191 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 192 ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr); 193 ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr); 194 ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr); 195 ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr); 196 if (type == NORM_2) { 197 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 198 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 199 } 200 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 201 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 202 } 203 } else if (type == NORM_1) { 204 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 205 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 206 } 207 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 208 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 209 } 210 } else if (type == NORM_INFINITY) { 211 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 212 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 213 } 214 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 215 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 216 } 217 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 218 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 219 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 220 } 221 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 222 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 223 } 224 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 225 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 226 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 227 } 228 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 229 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 230 } 231 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 232 if (type == NORM_INFINITY) { 233 ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 234 } else { 235 ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 236 } 237 ierr = PetscFree(work);CHKERRQ(ierr); 238 if (type == NORM_2) { 239 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 240 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 241 for (i=0; i<n; i++) reductions[i] /= m; 242 } 243 PetscFunctionReturn(0); 244 } 245 246 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 247 { 248 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 249 IS sis,gis; 250 PetscErrorCode ierr; 251 const PetscInt *isis,*igis; 252 PetscInt n,*iis,nsis,ngis,rstart,i; 253 254 PetscFunctionBegin; 255 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 256 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 257 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 258 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 259 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 260 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 261 262 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 263 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 264 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 265 n = ngis + nsis; 266 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 267 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 268 for (i=0; i<n; i++) iis[i] += rstart; 269 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 270 271 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 272 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 273 ierr = ISDestroy(&sis);CHKERRQ(ierr); 274 ierr = ISDestroy(&gis);CHKERRQ(ierr); 275 PetscFunctionReturn(0); 276 } 277 278 /* 279 Local utility routine that creates a mapping from the global column 280 number to the local number in the off-diagonal part of the local 281 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 282 a slightly higher hash table cost; without it it is not scalable (each processor 283 has an order N integer array but is fast to access. 284 */ 285 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt n = aij->B->cmap->n,i; 290 291 PetscFunctionBegin; 292 if (n && !aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 293 #if defined(PETSC_USE_CTABLE) 294 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 295 for (i=0; i<n; i++) { 296 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 297 } 298 #else 299 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 300 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 301 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 302 #endif 303 PetscFunctionReturn(0); 304 } 305 306 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 307 { \ 308 if (col <= lastcol1) low1 = 0; \ 309 else high1 = nrow1; \ 310 lastcol1 = col;\ 311 while (high1-low1 > 5) { \ 312 t = (low1+high1)/2; \ 313 if (rp1[t] > col) high1 = t; \ 314 else low1 = t; \ 315 } \ 316 for (_i=low1; _i<high1; _i++) { \ 317 if (rp1[_i] > col) break; \ 318 if (rp1[_i] == col) { \ 319 if (addv == ADD_VALUES) { \ 320 ap1[_i] += value; \ 321 /* Not sure LogFlops will slow dow the code or not */ \ 322 (void)PetscLogFlops(1.0); \ 323 } \ 324 else ap1[_i] = value; \ 325 inserted = PETSC_TRUE; \ 326 goto a_noinsert; \ 327 } \ 328 } \ 329 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 330 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 331 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 332 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 333 N = nrow1++ - 1; a->nz++; high1++; \ 334 /* shift up all the later entries in this row */ \ 335 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 336 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 337 rp1[_i] = col; \ 338 ap1[_i] = value; \ 339 A->nonzerostate++;\ 340 a_noinsert: ; \ 341 ailen[row] = nrow1; \ 342 } 343 344 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 345 { \ 346 if (col <= lastcol2) low2 = 0; \ 347 else high2 = nrow2; \ 348 lastcol2 = col; \ 349 while (high2-low2 > 5) { \ 350 t = (low2+high2)/2; \ 351 if (rp2[t] > col) high2 = t; \ 352 else low2 = t; \ 353 } \ 354 for (_i=low2; _i<high2; _i++) { \ 355 if (rp2[_i] > col) break; \ 356 if (rp2[_i] == col) { \ 357 if (addv == ADD_VALUES) { \ 358 ap2[_i] += value; \ 359 (void)PetscLogFlops(1.0); \ 360 } \ 361 else ap2[_i] = value; \ 362 inserted = PETSC_TRUE; \ 363 goto b_noinsert; \ 364 } \ 365 } \ 366 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 367 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 368 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 369 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 370 N = nrow2++ - 1; b->nz++; high2++; \ 371 /* shift up all the later entries in this row */ \ 372 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 373 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 374 rp2[_i] = col; \ 375 ap2[_i] = value; \ 376 B->nonzerostate++; \ 377 b_noinsert: ; \ 378 bilen[row] = nrow2; \ 379 } 380 381 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 382 { 383 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 384 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 385 PetscErrorCode ierr; 386 PetscInt l,*garray = mat->garray,diag; 387 388 PetscFunctionBegin; 389 /* code only works for square matrices A */ 390 391 /* find size of row to the left of the diagonal part */ 392 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 393 row = row - diag; 394 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 395 if (garray[b->j[b->i[row]+l]] > diag) break; 396 } 397 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 398 399 /* diagonal part */ 400 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 401 402 /* right of diagonal part */ 403 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 404 #if defined(PETSC_HAVE_DEVICE) 405 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 406 #endif 407 PetscFunctionReturn(0); 408 } 409 410 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 413 PetscScalar value = 0.0; 414 PetscErrorCode ierr; 415 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 416 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 417 PetscBool roworiented = aij->roworiented; 418 419 /* Some Variables required in the macro */ 420 Mat A = aij->A; 421 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 422 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 423 PetscBool ignorezeroentries = a->ignorezeroentries; 424 Mat B = aij->B; 425 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 426 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 427 MatScalar *aa,*ba; 428 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 429 * cannot use "#if defined" inside a macro. */ 430 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 431 432 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 433 PetscInt nonew; 434 MatScalar *ap1,*ap2; 435 436 PetscFunctionBegin; 437 #if defined(PETSC_HAVE_DEVICE) 438 if (A->offloadmask == PETSC_OFFLOAD_GPU) { 439 const PetscScalar *dummy; 440 ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr); 441 ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr); 442 } 443 if (B->offloadmask == PETSC_OFFLOAD_GPU) { 444 const PetscScalar *dummy; 445 ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr); 446 ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr); 447 } 448 #endif 449 aa = a->a; 450 ba = b->a; 451 for (i=0; i<m; i++) { 452 if (im[i] < 0) continue; 453 if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 454 if (im[i] >= rstart && im[i] < rend) { 455 row = im[i] - rstart; 456 lastcol1 = -1; 457 rp1 = aj + ai[row]; 458 ap1 = aa + ai[row]; 459 rmax1 = aimax[row]; 460 nrow1 = ailen[row]; 461 low1 = 0; 462 high1 = nrow1; 463 lastcol2 = -1; 464 rp2 = bj + bi[row]; 465 ap2 = ba + bi[row]; 466 rmax2 = bimax[row]; 467 nrow2 = bilen[row]; 468 low2 = 0; 469 high2 = nrow2; 470 471 for (j=0; j<n; j++) { 472 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 473 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 474 if (in[j] >= cstart && in[j] < cend) { 475 col = in[j] - cstart; 476 nonew = a->nonew; 477 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 478 #if defined(PETSC_HAVE_DEVICE) 479 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 480 #endif 481 } else if (in[j] < 0) continue; 482 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 483 else { 484 if (mat->was_assembled) { 485 if (!aij->colmap) { 486 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 487 } 488 #if defined(PETSC_USE_CTABLE) 489 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 490 col--; 491 #else 492 col = aij->colmap[in[j]] - 1; 493 #endif 494 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 495 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 496 col = in[j]; 497 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 498 B = aij->B; 499 b = (Mat_SeqAIJ*)B->data; 500 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 501 rp2 = bj + bi[row]; 502 ap2 = ba + bi[row]; 503 rmax2 = bimax[row]; 504 nrow2 = bilen[row]; 505 low2 = 0; 506 high2 = nrow2; 507 bm = aij->B->rmap->n; 508 ba = b->a; 509 inserted = PETSC_FALSE; 510 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 511 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 512 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 513 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 514 } 515 } else col = in[j]; 516 nonew = b->nonew; 517 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 518 #if defined(PETSC_HAVE_DEVICE) 519 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 520 #endif 521 } 522 } 523 } else { 524 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 525 if (!aij->donotstash) { 526 mat->assembled = PETSC_FALSE; 527 if (roworiented) { 528 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 529 } else { 530 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 531 } 532 } 533 } 534 } 535 PetscFunctionReturn(0); 536 } 537 538 /* 539 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 540 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 541 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 542 */ 543 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 544 { 545 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 546 Mat A = aij->A; /* diagonal part of the matrix */ 547 Mat B = aij->B; /* offdiagonal part of the matrix */ 548 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 549 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 550 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 551 PetscInt *ailen = a->ilen,*aj = a->j; 552 PetscInt *bilen = b->ilen,*bj = b->j; 553 PetscInt am = aij->A->rmap->n,j; 554 PetscInt diag_so_far = 0,dnz; 555 PetscInt offd_so_far = 0,onz; 556 557 PetscFunctionBegin; 558 /* Iterate over all rows of the matrix */ 559 for (j=0; j<am; j++) { 560 dnz = onz = 0; 561 /* Iterate over all non-zero columns of the current row */ 562 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 563 /* If column is in the diagonal */ 564 if (mat_j[col] >= cstart && mat_j[col] < cend) { 565 aj[diag_so_far++] = mat_j[col] - cstart; 566 dnz++; 567 } else { /* off-diagonal entries */ 568 bj[offd_so_far++] = mat_j[col]; 569 onz++; 570 } 571 } 572 ailen[j] = dnz; 573 bilen[j] = onz; 574 } 575 PetscFunctionReturn(0); 576 } 577 578 /* 579 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 580 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 581 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 582 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 583 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 584 */ 585 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 586 { 587 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 588 Mat A = aij->A; /* diagonal part of the matrix */ 589 Mat B = aij->B; /* offdiagonal part of the matrix */ 590 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 591 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 592 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 593 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 594 PetscInt *ailen = a->ilen,*aj = a->j; 595 PetscInt *bilen = b->ilen,*bj = b->j; 596 PetscInt am = aij->A->rmap->n,j; 597 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 598 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 599 PetscScalar *aa = a->a,*ba = b->a; 600 601 PetscFunctionBegin; 602 /* Iterate over all rows of the matrix */ 603 for (j=0; j<am; j++) { 604 dnz_row = onz_row = 0; 605 rowstart_offd = full_offd_i[j]; 606 rowstart_diag = full_diag_i[j]; 607 /* Iterate over all non-zero columns of the current row */ 608 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 609 /* If column is in the diagonal */ 610 if (mat_j[col] >= cstart && mat_j[col] < cend) { 611 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 612 aa[rowstart_diag+dnz_row] = mat_a[col]; 613 dnz_row++; 614 } else { /* off-diagonal entries */ 615 bj[rowstart_offd+onz_row] = mat_j[col]; 616 ba[rowstart_offd+onz_row] = mat_a[col]; 617 onz_row++; 618 } 619 } 620 ailen[j] = dnz_row; 621 bilen[j] = onz_row; 622 } 623 PetscFunctionReturn(0); 624 } 625 626 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 627 { 628 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 629 PetscErrorCode ierr; 630 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 631 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 632 633 PetscFunctionBegin; 634 for (i=0; i<m; i++) { 635 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 636 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 637 if (idxm[i] >= rstart && idxm[i] < rend) { 638 row = idxm[i] - rstart; 639 for (j=0; j<n; j++) { 640 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 641 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 642 if (idxn[j] >= cstart && idxn[j] < cend) { 643 col = idxn[j] - cstart; 644 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 645 } else { 646 if (!aij->colmap) { 647 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 648 } 649 #if defined(PETSC_USE_CTABLE) 650 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 651 col--; 652 #else 653 col = aij->colmap[idxn[j]] - 1; 654 #endif 655 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 656 else { 657 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 658 } 659 } 660 } 661 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 662 } 663 PetscFunctionReturn(0); 664 } 665 666 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 667 { 668 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 669 PetscErrorCode ierr; 670 PetscInt nstash,reallocs; 671 672 PetscFunctionBegin; 673 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 674 675 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 676 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 677 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 678 PetscFunctionReturn(0); 679 } 680 681 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 682 { 683 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 684 PetscErrorCode ierr; 685 PetscMPIInt n; 686 PetscInt i,j,rstart,ncols,flg; 687 PetscInt *row,*col; 688 PetscBool other_disassembled; 689 PetscScalar *val; 690 691 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 692 693 PetscFunctionBegin; 694 if (!aij->donotstash && !mat->nooffprocentries) { 695 while (1) { 696 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 697 if (!flg) break; 698 699 for (i=0; i<n;) { 700 /* Now identify the consecutive vals belonging to the same row */ 701 for (j=i,rstart=row[j]; j<n; j++) { 702 if (row[j] != rstart) break; 703 } 704 if (j < n) ncols = j-i; 705 else ncols = n-i; 706 /* Now assemble all these values with a single function call */ 707 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 708 i = j; 709 } 710 } 711 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 712 } 713 #if defined(PETSC_HAVE_DEVICE) 714 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 715 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 716 if (mat->boundtocpu) { 717 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 718 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 719 } 720 #endif 721 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 722 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 723 724 /* determine if any processor has disassembled, if so we must 725 also disassemble ourself, in order that we may reassemble. */ 726 /* 727 if nonzero structure of submatrix B cannot change then we know that 728 no processor disassembled thus we can skip this stuff 729 */ 730 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 731 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 732 if (mat->was_assembled && !other_disassembled) { 733 #if defined(PETSC_HAVE_DEVICE) 734 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 735 #endif 736 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 737 } 738 } 739 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 740 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 741 } 742 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 743 #if defined(PETSC_HAVE_DEVICE) 744 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 745 #endif 746 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 747 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 748 749 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 750 751 aij->rowvalues = NULL; 752 753 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 754 755 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 756 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 757 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 758 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 759 } 760 #if defined(PETSC_HAVE_DEVICE) 761 mat->offloadmask = PETSC_OFFLOAD_BOTH; 762 #endif 763 PetscFunctionReturn(0); 764 } 765 766 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 767 { 768 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 769 PetscErrorCode ierr; 770 771 PetscFunctionBegin; 772 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 773 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 774 PetscFunctionReturn(0); 775 } 776 777 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 778 { 779 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 780 PetscObjectState sA, sB; 781 PetscInt *lrows; 782 PetscInt r, len; 783 PetscBool cong, lch, gch; 784 PetscErrorCode ierr; 785 786 PetscFunctionBegin; 787 /* get locally owned rows */ 788 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 789 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 790 /* fix right hand side if needed */ 791 if (x && b) { 792 const PetscScalar *xx; 793 PetscScalar *bb; 794 795 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 796 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 797 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 798 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 799 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 800 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 801 } 802 803 sA = mat->A->nonzerostate; 804 sB = mat->B->nonzerostate; 805 806 if (diag != 0.0 && cong) { 807 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 808 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 809 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 810 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 811 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 812 PetscInt nnwA, nnwB; 813 PetscBool nnzA, nnzB; 814 815 nnwA = aijA->nonew; 816 nnwB = aijB->nonew; 817 nnzA = aijA->keepnonzeropattern; 818 nnzB = aijB->keepnonzeropattern; 819 if (!nnzA) { 820 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 821 aijA->nonew = 0; 822 } 823 if (!nnzB) { 824 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 825 aijB->nonew = 0; 826 } 827 /* Must zero here before the next loop */ 828 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 829 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 830 for (r = 0; r < len; ++r) { 831 const PetscInt row = lrows[r] + A->rmap->rstart; 832 if (row >= A->cmap->N) continue; 833 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 834 } 835 aijA->nonew = nnwA; 836 aijB->nonew = nnwB; 837 } else { 838 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 839 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 840 } 841 ierr = PetscFree(lrows);CHKERRQ(ierr); 842 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 843 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 844 845 /* reduce nonzerostate */ 846 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 847 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 848 if (gch) A->nonzerostate++; 849 PetscFunctionReturn(0); 850 } 851 852 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 853 { 854 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 855 PetscErrorCode ierr; 856 PetscMPIInt n = A->rmap->n; 857 PetscInt i,j,r,m,len = 0; 858 PetscInt *lrows,*owners = A->rmap->range; 859 PetscMPIInt p = 0; 860 PetscSFNode *rrows; 861 PetscSF sf; 862 const PetscScalar *xx; 863 PetscScalar *bb,*mask; 864 Vec xmask,lmask; 865 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 866 const PetscInt *aj, *ii,*ridx; 867 PetscScalar *aa; 868 869 PetscFunctionBegin; 870 /* Create SF where leaves are input rows and roots are owned rows */ 871 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 872 for (r = 0; r < n; ++r) lrows[r] = -1; 873 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 874 for (r = 0; r < N; ++r) { 875 const PetscInt idx = rows[r]; 876 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 877 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 878 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 879 } 880 rrows[r].rank = p; 881 rrows[r].index = rows[r] - owners[p]; 882 } 883 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 884 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 885 /* Collect flags for rows to be zeroed */ 886 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 887 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 888 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 889 /* Compress and put in row numbers */ 890 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 891 /* zero diagonal part of matrix */ 892 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 893 /* handle off diagonal part of matrix */ 894 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 895 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 896 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 897 for (i=0; i<len; i++) bb[lrows[i]] = 1; 898 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 899 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 900 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 901 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 902 if (x && b) { /* this code is buggy when the row and column layout don't match */ 903 PetscBool cong; 904 905 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 906 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 907 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 908 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 910 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 911 } 912 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 913 /* remove zeroed rows of off diagonal matrix */ 914 ii = aij->i; 915 for (i=0; i<len; i++) { 916 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 917 } 918 /* loop over all elements of off process part of matrix zeroing removed columns*/ 919 if (aij->compressedrow.use) { 920 m = aij->compressedrow.nrows; 921 ii = aij->compressedrow.i; 922 ridx = aij->compressedrow.rindex; 923 for (i=0; i<m; i++) { 924 n = ii[i+1] - ii[i]; 925 aj = aij->j + ii[i]; 926 aa = aij->a + ii[i]; 927 928 for (j=0; j<n; j++) { 929 if (PetscAbsScalar(mask[*aj])) { 930 if (b) bb[*ridx] -= *aa*xx[*aj]; 931 *aa = 0.0; 932 } 933 aa++; 934 aj++; 935 } 936 ridx++; 937 } 938 } else { /* do not use compressed row format */ 939 m = l->B->rmap->n; 940 for (i=0; i<m; i++) { 941 n = ii[i+1] - ii[i]; 942 aj = aij->j + ii[i]; 943 aa = aij->a + ii[i]; 944 for (j=0; j<n; j++) { 945 if (PetscAbsScalar(mask[*aj])) { 946 if (b) bb[i] -= *aa*xx[*aj]; 947 *aa = 0.0; 948 } 949 aa++; 950 aj++; 951 } 952 } 953 } 954 if (x && b) { 955 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 956 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 957 } 958 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 959 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 960 ierr = PetscFree(lrows);CHKERRQ(ierr); 961 962 /* only change matrix nonzero state if pattern was allowed to be changed */ 963 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 964 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 965 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 966 } 967 PetscFunctionReturn(0); 968 } 969 970 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 971 { 972 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 973 PetscErrorCode ierr; 974 PetscInt nt; 975 VecScatter Mvctx = a->Mvctx; 976 977 PetscFunctionBegin; 978 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 979 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 980 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 981 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 982 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 983 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } 986 987 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 988 { 989 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 990 PetscErrorCode ierr; 991 992 PetscFunctionBegin; 993 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } 996 997 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 998 { 999 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1000 PetscErrorCode ierr; 1001 VecScatter Mvctx = a->Mvctx; 1002 1003 PetscFunctionBegin; 1004 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1012 { 1013 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1014 PetscErrorCode ierr; 1015 1016 PetscFunctionBegin; 1017 /* do nondiagonal part */ 1018 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1019 /* do local part */ 1020 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1021 /* add partial results together */ 1022 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1023 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1024 PetscFunctionReturn(0); 1025 } 1026 1027 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1028 { 1029 MPI_Comm comm; 1030 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1031 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1032 IS Me,Notme; 1033 PetscErrorCode ierr; 1034 PetscInt M,N,first,last,*notme,i; 1035 PetscBool lf; 1036 PetscMPIInt size; 1037 1038 PetscFunctionBegin; 1039 /* Easy test: symmetric diagonal block */ 1040 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1041 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1042 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr); 1043 if (!*f) PetscFunctionReturn(0); 1044 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1045 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 1046 if (size == 1) PetscFunctionReturn(0); 1047 1048 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1049 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1050 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1051 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1052 for (i=0; i<first; i++) notme[i] = i; 1053 for (i=last; i<M; i++) notme[i-last+first] = i; 1054 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1055 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1056 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1057 Aoff = Aoffs[0]; 1058 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1059 Boff = Boffs[0]; 1060 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1061 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1062 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1063 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1064 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1065 ierr = PetscFree(notme);CHKERRQ(ierr); 1066 PetscFunctionReturn(0); 1067 } 1068 1069 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1070 { 1071 PetscErrorCode ierr; 1072 1073 PetscFunctionBegin; 1074 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } 1077 1078 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1079 { 1080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1081 PetscErrorCode ierr; 1082 1083 PetscFunctionBegin; 1084 /* do nondiagonal part */ 1085 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1086 /* do local part */ 1087 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1088 /* add partial results together */ 1089 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1090 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 /* 1095 This only works correctly for square matrices where the subblock A->A is the 1096 diagonal block 1097 */ 1098 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1099 { 1100 PetscErrorCode ierr; 1101 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1102 1103 PetscFunctionBegin; 1104 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1105 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1106 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1111 { 1112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1113 PetscErrorCode ierr; 1114 1115 PetscFunctionBegin; 1116 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1117 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1122 { 1123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 #if defined(PETSC_USE_LOG) 1128 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1129 #endif 1130 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1131 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1132 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1133 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1134 #if defined(PETSC_USE_CTABLE) 1135 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1136 #else 1137 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1138 #endif 1139 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1140 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1141 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1142 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1143 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1144 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1145 1146 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1147 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1148 1149 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1151 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1152 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1153 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1154 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1155 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1156 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1157 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1158 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1159 #if defined(PETSC_HAVE_CUDA) 1160 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1161 #endif 1162 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1163 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr); 1164 #endif 1165 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 1166 #if defined(PETSC_HAVE_ELEMENTAL) 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1168 #endif 1169 #if defined(PETSC_HAVE_SCALAPACK) 1170 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1171 #endif 1172 #if defined(PETSC_HAVE_HYPRE) 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1174 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1175 #endif 1176 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1177 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1178 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1179 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr); 1180 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr); 1181 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr); 1182 #if defined(PETSC_HAVE_MKL_SPARSE) 1183 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr); 1184 #endif 1185 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr); 1186 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1187 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr); 1188 PetscFunctionReturn(0); 1189 } 1190 1191 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1192 { 1193 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1194 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1195 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1196 const PetscInt *garray = aij->garray; 1197 const PetscScalar *aa,*ba; 1198 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1199 PetscInt *rowlens; 1200 PetscInt *colidxs; 1201 PetscScalar *matvals; 1202 PetscErrorCode ierr; 1203 1204 PetscFunctionBegin; 1205 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1206 1207 M = mat->rmap->N; 1208 N = mat->cmap->N; 1209 m = mat->rmap->n; 1210 rs = mat->rmap->rstart; 1211 cs = mat->cmap->rstart; 1212 nz = A->nz + B->nz; 1213 1214 /* write matrix header */ 1215 header[0] = MAT_FILE_CLASSID; 1216 header[1] = M; header[2] = N; header[3] = nz; 1217 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1218 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1219 1220 /* fill in and store row lengths */ 1221 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1222 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1223 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1224 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1225 1226 /* fill in and store column indices */ 1227 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1228 for (cnt=0, i=0; i<m; i++) { 1229 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1230 if (garray[B->j[jb]] > cs) break; 1231 colidxs[cnt++] = garray[B->j[jb]]; 1232 } 1233 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1234 colidxs[cnt++] = A->j[ja] + cs; 1235 for (; jb<B->i[i+1]; jb++) 1236 colidxs[cnt++] = garray[B->j[jb]]; 1237 } 1238 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1239 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1240 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1241 1242 /* fill in and store nonzero values */ 1243 ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr); 1244 ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr); 1245 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1246 for (cnt=0, i=0; i<m; i++) { 1247 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1248 if (garray[B->j[jb]] > cs) break; 1249 matvals[cnt++] = ba[jb]; 1250 } 1251 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1252 matvals[cnt++] = aa[ja]; 1253 for (; jb<B->i[i+1]; jb++) 1254 matvals[cnt++] = ba[jb]; 1255 } 1256 ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr); 1257 ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr); 1258 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1259 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1260 ierr = PetscFree(matvals);CHKERRQ(ierr); 1261 1262 /* write block size option to the viewer's .info file */ 1263 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #include <petscdraw.h> 1268 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1269 { 1270 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1271 PetscErrorCode ierr; 1272 PetscMPIInt rank = aij->rank,size = aij->size; 1273 PetscBool isdraw,iascii,isbinary; 1274 PetscViewer sviewer; 1275 PetscViewerFormat format; 1276 1277 PetscFunctionBegin; 1278 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1279 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1280 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1281 if (iascii) { 1282 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1283 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1284 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1285 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1286 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1287 for (i=0; i<(PetscInt)size; i++) { 1288 nmax = PetscMax(nmax,nz[i]); 1289 nmin = PetscMin(nmin,nz[i]); 1290 navg += nz[i]; 1291 } 1292 ierr = PetscFree(nz);CHKERRQ(ierr); 1293 navg = navg/size; 1294 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1295 PetscFunctionReturn(0); 1296 } 1297 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1298 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1299 MatInfo info; 1300 PetscInt *inodes=NULL; 1301 1302 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 1303 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1304 ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr); 1305 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1306 if (!inodes) { 1307 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1308 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1309 } else { 1310 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1311 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1312 } 1313 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1314 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1315 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1316 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1317 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1318 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1319 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1320 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1323 PetscInt inodecount,inodelimit,*inodes; 1324 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1325 if (inodes) { 1326 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1327 } else { 1328 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1329 } 1330 PetscFunctionReturn(0); 1331 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1332 PetscFunctionReturn(0); 1333 } 1334 } else if (isbinary) { 1335 if (size == 1) { 1336 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1337 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1338 } else { 1339 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1340 } 1341 PetscFunctionReturn(0); 1342 } else if (iascii && size == 1) { 1343 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1344 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1345 PetscFunctionReturn(0); 1346 } else if (isdraw) { 1347 PetscDraw draw; 1348 PetscBool isnull; 1349 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1350 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1351 if (isnull) PetscFunctionReturn(0); 1352 } 1353 1354 { /* assemble the entire matrix onto first processor */ 1355 Mat A = NULL, Av; 1356 IS isrow,iscol; 1357 1358 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1359 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1360 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1361 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1362 /* The commented code uses MatCreateSubMatrices instead */ 1363 /* 1364 Mat *AA, A = NULL, Av; 1365 IS isrow,iscol; 1366 1367 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1368 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1369 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1370 if (rank == 0) { 1371 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1372 A = AA[0]; 1373 Av = AA[0]; 1374 } 1375 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1376 */ 1377 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1378 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1379 /* 1380 Everyone has to call to draw the matrix since the graphics waits are 1381 synchronized across all processors that share the PetscDraw object 1382 */ 1383 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1384 if (rank == 0) { 1385 if (((PetscObject)mat)->name) { 1386 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1387 } 1388 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1389 } 1390 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1391 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1392 ierr = MatDestroy(&A);CHKERRQ(ierr); 1393 } 1394 PetscFunctionReturn(0); 1395 } 1396 1397 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1398 { 1399 PetscErrorCode ierr; 1400 PetscBool iascii,isdraw,issocket,isbinary; 1401 1402 PetscFunctionBegin; 1403 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1404 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1405 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1406 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1407 if (iascii || isdraw || isbinary || issocket) { 1408 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1409 } 1410 PetscFunctionReturn(0); 1411 } 1412 1413 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1414 { 1415 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1416 PetscErrorCode ierr; 1417 Vec bb1 = NULL; 1418 PetscBool hasop; 1419 1420 PetscFunctionBegin; 1421 if (flag == SOR_APPLY_UPPER) { 1422 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1423 PetscFunctionReturn(0); 1424 } 1425 1426 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1427 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1428 } 1429 1430 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1431 if (flag & SOR_ZERO_INITIAL_GUESS) { 1432 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1433 its--; 1434 } 1435 1436 while (its--) { 1437 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1438 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1439 1440 /* update rhs: bb1 = bb - B*x */ 1441 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1442 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1443 1444 /* local sweep */ 1445 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1446 } 1447 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1448 if (flag & SOR_ZERO_INITIAL_GUESS) { 1449 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1450 its--; 1451 } 1452 while (its--) { 1453 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1454 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1455 1456 /* update rhs: bb1 = bb - B*x */ 1457 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1458 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1459 1460 /* local sweep */ 1461 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1462 } 1463 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1464 if (flag & SOR_ZERO_INITIAL_GUESS) { 1465 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1466 its--; 1467 } 1468 while (its--) { 1469 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1470 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1471 1472 /* update rhs: bb1 = bb - B*x */ 1473 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1474 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1475 1476 /* local sweep */ 1477 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1478 } 1479 } else if (flag & SOR_EISENSTAT) { 1480 Vec xx1; 1481 1482 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1483 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1484 1485 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1486 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1487 if (!mat->diag) { 1488 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1489 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1490 } 1491 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1492 if (hasop) { 1493 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1494 } else { 1495 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1496 } 1497 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1498 1499 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1500 1501 /* local sweep */ 1502 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1503 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1504 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1505 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1506 1507 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1508 1509 matin->factorerrortype = mat->A->factorerrortype; 1510 PetscFunctionReturn(0); 1511 } 1512 1513 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1514 { 1515 Mat aA,aB,Aperm; 1516 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1517 PetscScalar *aa,*ba; 1518 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1519 PetscSF rowsf,sf; 1520 IS parcolp = NULL; 1521 PetscBool done; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1526 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1527 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1528 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1529 1530 /* Invert row permutation to find out where my rows should go */ 1531 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1532 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1533 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1534 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1535 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1536 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1537 1538 /* Invert column permutation to find out where my columns should go */ 1539 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1540 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1541 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1542 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1543 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1544 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1545 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1546 1547 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1548 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1549 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1550 1551 /* Find out where my gcols should go */ 1552 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1553 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1554 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1555 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1556 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1557 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1558 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1559 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1560 1561 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1562 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1563 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1564 for (i=0; i<m; i++) { 1565 PetscInt row = rdest[i]; 1566 PetscMPIInt rowner; 1567 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1568 for (j=ai[i]; j<ai[i+1]; j++) { 1569 PetscInt col = cdest[aj[j]]; 1570 PetscMPIInt cowner; 1571 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1572 if (rowner == cowner) dnnz[i]++; 1573 else onnz[i]++; 1574 } 1575 for (j=bi[i]; j<bi[i+1]; j++) { 1576 PetscInt col = gcdest[bj[j]]; 1577 PetscMPIInt cowner; 1578 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1579 if (rowner == cowner) dnnz[i]++; 1580 else onnz[i]++; 1581 } 1582 } 1583 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1584 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1585 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1586 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1587 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1588 1589 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1590 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1591 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1592 for (i=0; i<m; i++) { 1593 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1594 PetscInt j0,rowlen; 1595 rowlen = ai[i+1] - ai[i]; 1596 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1597 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1598 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1599 } 1600 rowlen = bi[i+1] - bi[i]; 1601 for (j0=j=0; j<rowlen; j0=j) { 1602 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1603 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1604 } 1605 } 1606 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1607 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1608 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1609 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1610 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1611 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1612 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1613 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1614 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1615 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1616 *B = Aperm; 1617 PetscFunctionReturn(0); 1618 } 1619 1620 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1621 { 1622 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1623 PetscErrorCode ierr; 1624 1625 PetscFunctionBegin; 1626 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1627 if (ghosts) *ghosts = aij->garray; 1628 PetscFunctionReturn(0); 1629 } 1630 1631 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1632 { 1633 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1634 Mat A = mat->A,B = mat->B; 1635 PetscErrorCode ierr; 1636 PetscLogDouble isend[5],irecv[5]; 1637 1638 PetscFunctionBegin; 1639 info->block_size = 1.0; 1640 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1641 1642 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1643 isend[3] = info->memory; isend[4] = info->mallocs; 1644 1645 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1646 1647 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1648 isend[3] += info->memory; isend[4] += info->mallocs; 1649 if (flag == MAT_LOCAL) { 1650 info->nz_used = isend[0]; 1651 info->nz_allocated = isend[1]; 1652 info->nz_unneeded = isend[2]; 1653 info->memory = isend[3]; 1654 info->mallocs = isend[4]; 1655 } else if (flag == MAT_GLOBAL_MAX) { 1656 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1657 1658 info->nz_used = irecv[0]; 1659 info->nz_allocated = irecv[1]; 1660 info->nz_unneeded = irecv[2]; 1661 info->memory = irecv[3]; 1662 info->mallocs = irecv[4]; 1663 } else if (flag == MAT_GLOBAL_SUM) { 1664 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1665 1666 info->nz_used = irecv[0]; 1667 info->nz_allocated = irecv[1]; 1668 info->nz_unneeded = irecv[2]; 1669 info->memory = irecv[3]; 1670 info->mallocs = irecv[4]; 1671 } 1672 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1673 info->fill_ratio_needed = 0; 1674 info->factor_mallocs = 0; 1675 PetscFunctionReturn(0); 1676 } 1677 1678 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1679 { 1680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1681 PetscErrorCode ierr; 1682 1683 PetscFunctionBegin; 1684 switch (op) { 1685 case MAT_NEW_NONZERO_LOCATIONS: 1686 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1687 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1688 case MAT_KEEP_NONZERO_PATTERN: 1689 case MAT_NEW_NONZERO_LOCATION_ERR: 1690 case MAT_USE_INODES: 1691 case MAT_IGNORE_ZERO_ENTRIES: 1692 case MAT_FORM_EXPLICIT_TRANSPOSE: 1693 MatCheckPreallocated(A,1); 1694 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1695 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1696 break; 1697 case MAT_ROW_ORIENTED: 1698 MatCheckPreallocated(A,1); 1699 a->roworiented = flg; 1700 1701 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1702 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1703 break; 1704 case MAT_FORCE_DIAGONAL_ENTRIES: 1705 case MAT_SORTED_FULL: 1706 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1707 break; 1708 case MAT_IGNORE_OFF_PROC_ENTRIES: 1709 a->donotstash = flg; 1710 break; 1711 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1712 case MAT_SPD: 1713 case MAT_SYMMETRIC: 1714 case MAT_STRUCTURALLY_SYMMETRIC: 1715 case MAT_HERMITIAN: 1716 case MAT_SYMMETRY_ETERNAL: 1717 break; 1718 case MAT_SUBMAT_SINGLEIS: 1719 A->submat_singleis = flg; 1720 break; 1721 case MAT_STRUCTURE_ONLY: 1722 /* The option is handled directly by MatSetOption() */ 1723 break; 1724 default: 1725 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1726 } 1727 PetscFunctionReturn(0); 1728 } 1729 1730 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1731 { 1732 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1733 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1734 PetscErrorCode ierr; 1735 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1736 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1737 PetscInt *cmap,*idx_p; 1738 1739 PetscFunctionBegin; 1740 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1741 mat->getrowactive = PETSC_TRUE; 1742 1743 if (!mat->rowvalues && (idx || v)) { 1744 /* 1745 allocate enough space to hold information from the longest row. 1746 */ 1747 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1748 PetscInt max = 1,tmp; 1749 for (i=0; i<matin->rmap->n; i++) { 1750 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1751 if (max < tmp) max = tmp; 1752 } 1753 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1754 } 1755 1756 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1757 lrow = row - rstart; 1758 1759 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1760 if (!v) {pvA = NULL; pvB = NULL;} 1761 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1762 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1763 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1764 nztot = nzA + nzB; 1765 1766 cmap = mat->garray; 1767 if (v || idx) { 1768 if (nztot) { 1769 /* Sort by increasing column numbers, assuming A and B already sorted */ 1770 PetscInt imark = -1; 1771 if (v) { 1772 *v = v_p = mat->rowvalues; 1773 for (i=0; i<nzB; i++) { 1774 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1775 else break; 1776 } 1777 imark = i; 1778 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1779 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1780 } 1781 if (idx) { 1782 *idx = idx_p = mat->rowindices; 1783 if (imark > -1) { 1784 for (i=0; i<imark; i++) { 1785 idx_p[i] = cmap[cworkB[i]]; 1786 } 1787 } else { 1788 for (i=0; i<nzB; i++) { 1789 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1790 else break; 1791 } 1792 imark = i; 1793 } 1794 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1795 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1796 } 1797 } else { 1798 if (idx) *idx = NULL; 1799 if (v) *v = NULL; 1800 } 1801 } 1802 *nz = nztot; 1803 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1804 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1805 PetscFunctionReturn(0); 1806 } 1807 1808 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1809 { 1810 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1811 1812 PetscFunctionBegin; 1813 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1814 aij->getrowactive = PETSC_FALSE; 1815 PetscFunctionReturn(0); 1816 } 1817 1818 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1819 { 1820 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1821 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1822 PetscErrorCode ierr; 1823 PetscInt i,j,cstart = mat->cmap->rstart; 1824 PetscReal sum = 0.0; 1825 MatScalar *v; 1826 1827 PetscFunctionBegin; 1828 if (aij->size == 1) { 1829 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1830 } else { 1831 if (type == NORM_FROBENIUS) { 1832 v = amat->a; 1833 for (i=0; i<amat->nz; i++) { 1834 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1835 } 1836 v = bmat->a; 1837 for (i=0; i<bmat->nz; i++) { 1838 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1839 } 1840 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1841 *norm = PetscSqrtReal(*norm); 1842 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1843 } else if (type == NORM_1) { /* max column norm */ 1844 PetscReal *tmp,*tmp2; 1845 PetscInt *jj,*garray = aij->garray; 1846 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1847 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1848 *norm = 0.0; 1849 v = amat->a; jj = amat->j; 1850 for (j=0; j<amat->nz; j++) { 1851 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1852 } 1853 v = bmat->a; jj = bmat->j; 1854 for (j=0; j<bmat->nz; j++) { 1855 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1856 } 1857 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1858 for (j=0; j<mat->cmap->N; j++) { 1859 if (tmp2[j] > *norm) *norm = tmp2[j]; 1860 } 1861 ierr = PetscFree(tmp);CHKERRQ(ierr); 1862 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1863 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1864 } else if (type == NORM_INFINITY) { /* max row norm */ 1865 PetscReal ntemp = 0.0; 1866 for (j=0; j<aij->A->rmap->n; j++) { 1867 v = amat->a + amat->i[j]; 1868 sum = 0.0; 1869 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1870 sum += PetscAbsScalar(*v); v++; 1871 } 1872 v = bmat->a + bmat->i[j]; 1873 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1874 sum += PetscAbsScalar(*v); v++; 1875 } 1876 if (sum > ntemp) ntemp = sum; 1877 } 1878 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1879 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1880 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1881 } 1882 PetscFunctionReturn(0); 1883 } 1884 1885 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1886 { 1887 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1888 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1889 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1890 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1891 PetscErrorCode ierr; 1892 Mat B,A_diag,*B_diag; 1893 const MatScalar *pbv,*bv; 1894 1895 PetscFunctionBegin; 1896 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1897 ai = Aloc->i; aj = Aloc->j; 1898 bi = Bloc->i; bj = Bloc->j; 1899 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1900 PetscInt *d_nnz,*g_nnz,*o_nnz; 1901 PetscSFNode *oloc; 1902 PETSC_UNUSED PetscSF sf; 1903 1904 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1905 /* compute d_nnz for preallocation */ 1906 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 1907 for (i=0; i<ai[ma]; i++) { 1908 d_nnz[aj[i]]++; 1909 } 1910 /* compute local off-diagonal contributions */ 1911 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 1912 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1913 /* map those to global */ 1914 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1915 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1916 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1917 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 1918 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1919 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1920 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1921 1922 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1923 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1924 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1925 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1926 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1927 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1928 } else { 1929 B = *matout; 1930 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1931 } 1932 1933 b = (Mat_MPIAIJ*)B->data; 1934 A_diag = a->A; 1935 B_diag = &b->A; 1936 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1937 A_diag_ncol = A_diag->cmap->N; 1938 B_diag_ilen = sub_B_diag->ilen; 1939 B_diag_i = sub_B_diag->i; 1940 1941 /* Set ilen for diagonal of B */ 1942 for (i=0; i<A_diag_ncol; i++) { 1943 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1944 } 1945 1946 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1947 very quickly (=without using MatSetValues), because all writes are local. */ 1948 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 1949 1950 /* copy over the B part */ 1951 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 1952 ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr); 1953 pbv = bv; 1954 row = A->rmap->rstart; 1955 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1956 cols_tmp = cols; 1957 for (i=0; i<mb; i++) { 1958 ncol = bi[i+1]-bi[i]; 1959 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr); 1960 row++; 1961 pbv += ncol; cols_tmp += ncol; 1962 } 1963 ierr = PetscFree(cols);CHKERRQ(ierr); 1964 ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr); 1965 1966 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1967 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1968 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1969 *matout = B; 1970 } else { 1971 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1972 } 1973 PetscFunctionReturn(0); 1974 } 1975 1976 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1977 { 1978 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1979 Mat a = aij->A,b = aij->B; 1980 PetscErrorCode ierr; 1981 PetscInt s1,s2,s3; 1982 1983 PetscFunctionBegin; 1984 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1985 if (rr) { 1986 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1987 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1988 /* Overlap communication with computation. */ 1989 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1990 } 1991 if (ll) { 1992 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1993 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1994 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1995 } 1996 /* scale the diagonal block */ 1997 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1998 1999 if (rr) { 2000 /* Do a scatter end and then right scale the off-diagonal block */ 2001 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2002 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 2003 } 2004 PetscFunctionReturn(0); 2005 } 2006 2007 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2008 { 2009 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2010 PetscErrorCode ierr; 2011 2012 PetscFunctionBegin; 2013 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2014 PetscFunctionReturn(0); 2015 } 2016 2017 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2018 { 2019 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2020 Mat a,b,c,d; 2021 PetscBool flg; 2022 PetscErrorCode ierr; 2023 2024 PetscFunctionBegin; 2025 a = matA->A; b = matA->B; 2026 c = matB->A; d = matB->B; 2027 2028 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2029 if (flg) { 2030 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2031 } 2032 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2033 PetscFunctionReturn(0); 2034 } 2035 2036 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2037 { 2038 PetscErrorCode ierr; 2039 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2040 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2041 2042 PetscFunctionBegin; 2043 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2044 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2045 /* because of the column compression in the off-processor part of the matrix a->B, 2046 the number of columns in a->B and b->B may be different, hence we cannot call 2047 the MatCopy() directly on the two parts. If need be, we can provide a more 2048 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2049 then copying the submatrices */ 2050 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2051 } else { 2052 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2053 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2054 } 2055 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2056 PetscFunctionReturn(0); 2057 } 2058 2059 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2060 { 2061 PetscErrorCode ierr; 2062 2063 PetscFunctionBegin; 2064 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 /* 2069 Computes the number of nonzeros per row needed for preallocation when X and Y 2070 have different nonzero structure. 2071 */ 2072 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2073 { 2074 PetscInt i,j,k,nzx,nzy; 2075 2076 PetscFunctionBegin; 2077 /* Set the number of nonzeros in the new matrix */ 2078 for (i=0; i<m; i++) { 2079 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2080 nzx = xi[i+1] - xi[i]; 2081 nzy = yi[i+1] - yi[i]; 2082 nnz[i] = 0; 2083 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2084 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2085 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2086 nnz[i]++; 2087 } 2088 for (; k<nzy; k++) nnz[i]++; 2089 } 2090 PetscFunctionReturn(0); 2091 } 2092 2093 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2094 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2095 { 2096 PetscErrorCode ierr; 2097 PetscInt m = Y->rmap->N; 2098 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2099 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2100 2101 PetscFunctionBegin; 2102 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2103 PetscFunctionReturn(0); 2104 } 2105 2106 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2107 { 2108 PetscErrorCode ierr; 2109 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2110 2111 PetscFunctionBegin; 2112 if (str == SAME_NONZERO_PATTERN) { 2113 ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr); 2114 ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr); 2115 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2116 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2117 } else { 2118 Mat B; 2119 PetscInt *nnz_d,*nnz_o; 2120 2121 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2122 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2123 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2124 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2125 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2126 ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr); 2127 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2128 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2129 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2130 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2131 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2132 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2133 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2134 } 2135 PetscFunctionReturn(0); 2136 } 2137 2138 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2139 2140 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2141 { 2142 #if defined(PETSC_USE_COMPLEX) 2143 PetscErrorCode ierr; 2144 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2145 2146 PetscFunctionBegin; 2147 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2148 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2149 #else 2150 PetscFunctionBegin; 2151 #endif 2152 PetscFunctionReturn(0); 2153 } 2154 2155 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2156 { 2157 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2158 PetscErrorCode ierr; 2159 2160 PetscFunctionBegin; 2161 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2162 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2163 PetscFunctionReturn(0); 2164 } 2165 2166 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2167 { 2168 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2169 PetscErrorCode ierr; 2170 2171 PetscFunctionBegin; 2172 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2173 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2174 PetscFunctionReturn(0); 2175 } 2176 2177 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2178 { 2179 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2180 PetscErrorCode ierr; 2181 PetscInt i,*idxb = NULL,m = A->rmap->n; 2182 PetscScalar *va,*vv; 2183 Vec vB,vA; 2184 const PetscScalar *vb; 2185 2186 PetscFunctionBegin; 2187 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 2188 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2189 2190 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2191 if (idx) { 2192 for (i=0; i<m; i++) { 2193 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2194 } 2195 } 2196 2197 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 2198 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 2199 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2200 2201 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2202 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2203 for (i=0; i<m; i++) { 2204 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2205 vv[i] = vb[i]; 2206 if (idx) idx[i] = a->garray[idxb[i]]; 2207 } else { 2208 vv[i] = va[i]; 2209 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2210 idx[i] = a->garray[idxb[i]]; 2211 } 2212 } 2213 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2214 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2215 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2216 ierr = PetscFree(idxb);CHKERRQ(ierr); 2217 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2218 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2219 PetscFunctionReturn(0); 2220 } 2221 2222 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2223 { 2224 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2225 PetscInt m = A->rmap->n,n = A->cmap->n; 2226 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2227 PetscInt *cmap = mat->garray; 2228 PetscInt *diagIdx, *offdiagIdx; 2229 Vec diagV, offdiagV; 2230 PetscScalar *a, *diagA, *offdiagA; 2231 const PetscScalar *ba,*bav; 2232 PetscInt r,j,col,ncols,*bi,*bj; 2233 PetscErrorCode ierr; 2234 Mat B = mat->B; 2235 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2236 2237 PetscFunctionBegin; 2238 /* When a process holds entire A and other processes have no entry */ 2239 if (A->cmap->N == n) { 2240 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2241 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2242 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr); 2243 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2244 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2245 PetscFunctionReturn(0); 2246 } else if (n == 0) { 2247 if (m) { 2248 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2249 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2250 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2251 } 2252 PetscFunctionReturn(0); 2253 } 2254 2255 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2256 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2257 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2258 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2259 2260 /* Get offdiagIdx[] for implicit 0.0 */ 2261 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2262 ba = bav; 2263 bi = b->i; 2264 bj = b->j; 2265 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2266 for (r = 0; r < m; r++) { 2267 ncols = bi[r+1] - bi[r]; 2268 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2269 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2270 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2271 offdiagA[r] = 0.0; 2272 2273 /* Find first hole in the cmap */ 2274 for (j=0; j<ncols; j++) { 2275 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2276 if (col > j && j < cstart) { 2277 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2278 break; 2279 } else if (col > j + n && j >= cstart) { 2280 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2281 break; 2282 } 2283 } 2284 if (j == ncols && ncols < A->cmap->N - n) { 2285 /* a hole is outside compressed Bcols */ 2286 if (ncols == 0) { 2287 if (cstart) { 2288 offdiagIdx[r] = 0; 2289 } else offdiagIdx[r] = cend; 2290 } else { /* ncols > 0 */ 2291 offdiagIdx[r] = cmap[ncols-1] + 1; 2292 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2293 } 2294 } 2295 } 2296 2297 for (j=0; j<ncols; j++) { 2298 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2299 ba++; bj++; 2300 } 2301 } 2302 2303 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2304 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2305 for (r = 0; r < m; ++r) { 2306 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2307 a[r] = diagA[r]; 2308 if (idx) idx[r] = cstart + diagIdx[r]; 2309 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2310 a[r] = diagA[r]; 2311 if (idx) { 2312 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2313 idx[r] = cstart + diagIdx[r]; 2314 } else idx[r] = offdiagIdx[r]; 2315 } 2316 } else { 2317 a[r] = offdiagA[r]; 2318 if (idx) idx[r] = offdiagIdx[r]; 2319 } 2320 } 2321 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2322 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2323 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2324 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2325 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2326 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2327 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2328 PetscFunctionReturn(0); 2329 } 2330 2331 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2332 { 2333 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2334 PetscInt m = A->rmap->n,n = A->cmap->n; 2335 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2336 PetscInt *cmap = mat->garray; 2337 PetscInt *diagIdx, *offdiagIdx; 2338 Vec diagV, offdiagV; 2339 PetscScalar *a, *diagA, *offdiagA; 2340 const PetscScalar *ba,*bav; 2341 PetscInt r,j,col,ncols,*bi,*bj; 2342 PetscErrorCode ierr; 2343 Mat B = mat->B; 2344 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2345 2346 PetscFunctionBegin; 2347 /* When a process holds entire A and other processes have no entry */ 2348 if (A->cmap->N == n) { 2349 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2350 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2351 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2352 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2353 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2354 PetscFunctionReturn(0); 2355 } else if (n == 0) { 2356 if (m) { 2357 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2358 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2359 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2360 } 2361 PetscFunctionReturn(0); 2362 } 2363 2364 ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2365 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2366 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2367 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2368 2369 /* Get offdiagIdx[] for implicit 0.0 */ 2370 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2371 ba = bav; 2372 bi = b->i; 2373 bj = b->j; 2374 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2375 for (r = 0; r < m; r++) { 2376 ncols = bi[r+1] - bi[r]; 2377 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2378 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2379 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2380 offdiagA[r] = 0.0; 2381 2382 /* Find first hole in the cmap */ 2383 for (j=0; j<ncols; j++) { 2384 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2385 if (col > j && j < cstart) { 2386 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2387 break; 2388 } else if (col > j + n && j >= cstart) { 2389 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2390 break; 2391 } 2392 } 2393 if (j == ncols && ncols < A->cmap->N - n) { 2394 /* a hole is outside compressed Bcols */ 2395 if (ncols == 0) { 2396 if (cstart) { 2397 offdiagIdx[r] = 0; 2398 } else offdiagIdx[r] = cend; 2399 } else { /* ncols > 0 */ 2400 offdiagIdx[r] = cmap[ncols-1] + 1; 2401 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2402 } 2403 } 2404 } 2405 2406 for (j=0; j<ncols; j++) { 2407 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2408 ba++; bj++; 2409 } 2410 } 2411 2412 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2413 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2414 for (r = 0; r < m; ++r) { 2415 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2416 a[r] = diagA[r]; 2417 if (idx) idx[r] = cstart + diagIdx[r]; 2418 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2419 a[r] = diagA[r]; 2420 if (idx) { 2421 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2422 idx[r] = cstart + diagIdx[r]; 2423 } else idx[r] = offdiagIdx[r]; 2424 } 2425 } else { 2426 a[r] = offdiagA[r]; 2427 if (idx) idx[r] = offdiagIdx[r]; 2428 } 2429 } 2430 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2431 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2432 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2433 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2434 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2435 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2436 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2437 PetscFunctionReturn(0); 2438 } 2439 2440 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2441 { 2442 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2443 PetscInt m = A->rmap->n,n = A->cmap->n; 2444 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2445 PetscInt *cmap = mat->garray; 2446 PetscInt *diagIdx, *offdiagIdx; 2447 Vec diagV, offdiagV; 2448 PetscScalar *a, *diagA, *offdiagA; 2449 const PetscScalar *ba,*bav; 2450 PetscInt r,j,col,ncols,*bi,*bj; 2451 PetscErrorCode ierr; 2452 Mat B = mat->B; 2453 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2454 2455 PetscFunctionBegin; 2456 /* When a process holds entire A and other processes have no entry */ 2457 if (A->cmap->N == n) { 2458 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2459 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2460 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2461 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2462 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2463 PetscFunctionReturn(0); 2464 } else if (n == 0) { 2465 if (m) { 2466 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2467 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2468 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2469 } 2470 PetscFunctionReturn(0); 2471 } 2472 2473 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2474 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2475 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2476 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2477 2478 /* Get offdiagIdx[] for implicit 0.0 */ 2479 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2480 ba = bav; 2481 bi = b->i; 2482 bj = b->j; 2483 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2484 for (r = 0; r < m; r++) { 2485 ncols = bi[r+1] - bi[r]; 2486 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2487 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2488 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2489 offdiagA[r] = 0.0; 2490 2491 /* Find first hole in the cmap */ 2492 for (j=0; j<ncols; j++) { 2493 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2494 if (col > j && j < cstart) { 2495 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2496 break; 2497 } else if (col > j + n && j >= cstart) { 2498 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2499 break; 2500 } 2501 } 2502 if (j == ncols && ncols < A->cmap->N - n) { 2503 /* a hole is outside compressed Bcols */ 2504 if (ncols == 0) { 2505 if (cstart) { 2506 offdiagIdx[r] = 0; 2507 } else offdiagIdx[r] = cend; 2508 } else { /* ncols > 0 */ 2509 offdiagIdx[r] = cmap[ncols-1] + 1; 2510 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2511 } 2512 } 2513 } 2514 2515 for (j=0; j<ncols; j++) { 2516 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2517 ba++; bj++; 2518 } 2519 } 2520 2521 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2522 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2523 for (r = 0; r < m; ++r) { 2524 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2525 a[r] = diagA[r]; 2526 if (idx) idx[r] = cstart + diagIdx[r]; 2527 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2528 a[r] = diagA[r]; 2529 if (idx) { 2530 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2531 idx[r] = cstart + diagIdx[r]; 2532 } else idx[r] = offdiagIdx[r]; 2533 } 2534 } else { 2535 a[r] = offdiagA[r]; 2536 if (idx) idx[r] = offdiagIdx[r]; 2537 } 2538 } 2539 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2540 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2541 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2542 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2543 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2544 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2545 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2546 PetscFunctionReturn(0); 2547 } 2548 2549 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2550 { 2551 PetscErrorCode ierr; 2552 Mat *dummy; 2553 2554 PetscFunctionBegin; 2555 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2556 *newmat = *dummy; 2557 ierr = PetscFree(dummy);CHKERRQ(ierr); 2558 PetscFunctionReturn(0); 2559 } 2560 2561 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2562 { 2563 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2564 PetscErrorCode ierr; 2565 2566 PetscFunctionBegin; 2567 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2568 A->factorerrortype = a->A->factorerrortype; 2569 PetscFunctionReturn(0); 2570 } 2571 2572 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2573 { 2574 PetscErrorCode ierr; 2575 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2576 2577 PetscFunctionBegin; 2578 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2579 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2580 if (x->assembled) { 2581 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2582 } else { 2583 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2584 } 2585 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2586 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2587 PetscFunctionReturn(0); 2588 } 2589 2590 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2591 { 2592 PetscFunctionBegin; 2593 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2594 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2595 PetscFunctionReturn(0); 2596 } 2597 2598 /*@ 2599 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2600 2601 Collective on Mat 2602 2603 Input Parameters: 2604 + A - the matrix 2605 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2606 2607 Level: advanced 2608 2609 @*/ 2610 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2611 { 2612 PetscErrorCode ierr; 2613 2614 PetscFunctionBegin; 2615 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2616 PetscFunctionReturn(0); 2617 } 2618 2619 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2620 { 2621 PetscErrorCode ierr; 2622 PetscBool sc = PETSC_FALSE,flg; 2623 2624 PetscFunctionBegin; 2625 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2626 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2627 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2628 if (flg) { 2629 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2630 } 2631 ierr = PetscOptionsTail();CHKERRQ(ierr); 2632 PetscFunctionReturn(0); 2633 } 2634 2635 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2636 { 2637 PetscErrorCode ierr; 2638 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2639 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2640 2641 PetscFunctionBegin; 2642 if (!Y->preallocated) { 2643 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2644 } else if (!aij->nz) { 2645 PetscInt nonew = aij->nonew; 2646 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2647 aij->nonew = nonew; 2648 } 2649 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2650 PetscFunctionReturn(0); 2651 } 2652 2653 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2654 { 2655 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2656 PetscErrorCode ierr; 2657 2658 PetscFunctionBegin; 2659 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2660 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2661 if (d) { 2662 PetscInt rstart; 2663 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2664 *d += rstart; 2665 2666 } 2667 PetscFunctionReturn(0); 2668 } 2669 2670 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2671 { 2672 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2673 PetscErrorCode ierr; 2674 2675 PetscFunctionBegin; 2676 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2677 PetscFunctionReturn(0); 2678 } 2679 2680 /* -------------------------------------------------------------------*/ 2681 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2682 MatGetRow_MPIAIJ, 2683 MatRestoreRow_MPIAIJ, 2684 MatMult_MPIAIJ, 2685 /* 4*/ MatMultAdd_MPIAIJ, 2686 MatMultTranspose_MPIAIJ, 2687 MatMultTransposeAdd_MPIAIJ, 2688 NULL, 2689 NULL, 2690 NULL, 2691 /*10*/ NULL, 2692 NULL, 2693 NULL, 2694 MatSOR_MPIAIJ, 2695 MatTranspose_MPIAIJ, 2696 /*15*/ MatGetInfo_MPIAIJ, 2697 MatEqual_MPIAIJ, 2698 MatGetDiagonal_MPIAIJ, 2699 MatDiagonalScale_MPIAIJ, 2700 MatNorm_MPIAIJ, 2701 /*20*/ MatAssemblyBegin_MPIAIJ, 2702 MatAssemblyEnd_MPIAIJ, 2703 MatSetOption_MPIAIJ, 2704 MatZeroEntries_MPIAIJ, 2705 /*24*/ MatZeroRows_MPIAIJ, 2706 NULL, 2707 NULL, 2708 NULL, 2709 NULL, 2710 /*29*/ MatSetUp_MPIAIJ, 2711 NULL, 2712 NULL, 2713 MatGetDiagonalBlock_MPIAIJ, 2714 NULL, 2715 /*34*/ MatDuplicate_MPIAIJ, 2716 NULL, 2717 NULL, 2718 NULL, 2719 NULL, 2720 /*39*/ MatAXPY_MPIAIJ, 2721 MatCreateSubMatrices_MPIAIJ, 2722 MatIncreaseOverlap_MPIAIJ, 2723 MatGetValues_MPIAIJ, 2724 MatCopy_MPIAIJ, 2725 /*44*/ MatGetRowMax_MPIAIJ, 2726 MatScale_MPIAIJ, 2727 MatShift_MPIAIJ, 2728 MatDiagonalSet_MPIAIJ, 2729 MatZeroRowsColumns_MPIAIJ, 2730 /*49*/ MatSetRandom_MPIAIJ, 2731 NULL, 2732 NULL, 2733 NULL, 2734 NULL, 2735 /*54*/ MatFDColoringCreate_MPIXAIJ, 2736 NULL, 2737 MatSetUnfactored_MPIAIJ, 2738 MatPermute_MPIAIJ, 2739 NULL, 2740 /*59*/ MatCreateSubMatrix_MPIAIJ, 2741 MatDestroy_MPIAIJ, 2742 MatView_MPIAIJ, 2743 NULL, 2744 NULL, 2745 /*64*/ NULL, 2746 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2747 NULL, 2748 NULL, 2749 NULL, 2750 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2751 MatGetRowMinAbs_MPIAIJ, 2752 NULL, 2753 NULL, 2754 NULL, 2755 NULL, 2756 /*75*/ MatFDColoringApply_AIJ, 2757 MatSetFromOptions_MPIAIJ, 2758 NULL, 2759 NULL, 2760 MatFindZeroDiagonals_MPIAIJ, 2761 /*80*/ NULL, 2762 NULL, 2763 NULL, 2764 /*83*/ MatLoad_MPIAIJ, 2765 MatIsSymmetric_MPIAIJ, 2766 NULL, 2767 NULL, 2768 NULL, 2769 NULL, 2770 /*89*/ NULL, 2771 NULL, 2772 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2773 NULL, 2774 NULL, 2775 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2776 NULL, 2777 NULL, 2778 NULL, 2779 MatBindToCPU_MPIAIJ, 2780 /*99*/ MatProductSetFromOptions_MPIAIJ, 2781 NULL, 2782 NULL, 2783 MatConjugate_MPIAIJ, 2784 NULL, 2785 /*104*/MatSetValuesRow_MPIAIJ, 2786 MatRealPart_MPIAIJ, 2787 MatImaginaryPart_MPIAIJ, 2788 NULL, 2789 NULL, 2790 /*109*/NULL, 2791 NULL, 2792 MatGetRowMin_MPIAIJ, 2793 NULL, 2794 MatMissingDiagonal_MPIAIJ, 2795 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2796 NULL, 2797 MatGetGhosts_MPIAIJ, 2798 NULL, 2799 NULL, 2800 /*119*/MatMultDiagonalBlock_MPIAIJ, 2801 NULL, 2802 NULL, 2803 NULL, 2804 MatGetMultiProcBlock_MPIAIJ, 2805 /*124*/MatFindNonzeroRows_MPIAIJ, 2806 MatGetColumnReductions_MPIAIJ, 2807 MatInvertBlockDiagonal_MPIAIJ, 2808 MatInvertVariableBlockDiagonal_MPIAIJ, 2809 MatCreateSubMatricesMPI_MPIAIJ, 2810 /*129*/NULL, 2811 NULL, 2812 NULL, 2813 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2814 NULL, 2815 /*134*/NULL, 2816 NULL, 2817 NULL, 2818 NULL, 2819 NULL, 2820 /*139*/MatSetBlockSizes_MPIAIJ, 2821 NULL, 2822 NULL, 2823 MatFDColoringSetUp_MPIXAIJ, 2824 MatFindOffBlockDiagonalEntries_MPIAIJ, 2825 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2826 /*145*/NULL, 2827 NULL, 2828 NULL 2829 }; 2830 2831 /* ----------------------------------------------------------------------------------------*/ 2832 2833 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2834 { 2835 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2836 PetscErrorCode ierr; 2837 2838 PetscFunctionBegin; 2839 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2840 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2841 PetscFunctionReturn(0); 2842 } 2843 2844 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2845 { 2846 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2847 PetscErrorCode ierr; 2848 2849 PetscFunctionBegin; 2850 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2851 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2852 PetscFunctionReturn(0); 2853 } 2854 2855 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2856 { 2857 Mat_MPIAIJ *b; 2858 PetscErrorCode ierr; 2859 PetscMPIInt size; 2860 2861 PetscFunctionBegin; 2862 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2863 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2864 b = (Mat_MPIAIJ*)B->data; 2865 2866 #if defined(PETSC_USE_CTABLE) 2867 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2868 #else 2869 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2870 #endif 2871 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2872 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2873 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2874 2875 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2876 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 2877 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2878 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2879 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2880 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2881 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2882 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2883 2884 if (!B->preallocated) { 2885 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2886 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2887 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2888 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2889 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2890 } 2891 2892 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2893 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2894 B->preallocated = PETSC_TRUE; 2895 B->was_assembled = PETSC_FALSE; 2896 B->assembled = PETSC_FALSE; 2897 PetscFunctionReturn(0); 2898 } 2899 2900 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2901 { 2902 Mat_MPIAIJ *b; 2903 PetscErrorCode ierr; 2904 2905 PetscFunctionBegin; 2906 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2907 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2908 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2909 b = (Mat_MPIAIJ*)B->data; 2910 2911 #if defined(PETSC_USE_CTABLE) 2912 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2913 #else 2914 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2915 #endif 2916 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2917 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2918 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2919 2920 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2921 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2922 B->preallocated = PETSC_TRUE; 2923 B->was_assembled = PETSC_FALSE; 2924 B->assembled = PETSC_FALSE; 2925 PetscFunctionReturn(0); 2926 } 2927 2928 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2929 { 2930 Mat mat; 2931 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2932 PetscErrorCode ierr; 2933 2934 PetscFunctionBegin; 2935 *newmat = NULL; 2936 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2937 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2938 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2939 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2940 a = (Mat_MPIAIJ*)mat->data; 2941 2942 mat->factortype = matin->factortype; 2943 mat->assembled = matin->assembled; 2944 mat->insertmode = NOT_SET_VALUES; 2945 mat->preallocated = matin->preallocated; 2946 2947 a->size = oldmat->size; 2948 a->rank = oldmat->rank; 2949 a->donotstash = oldmat->donotstash; 2950 a->roworiented = oldmat->roworiented; 2951 a->rowindices = NULL; 2952 a->rowvalues = NULL; 2953 a->getrowactive = PETSC_FALSE; 2954 2955 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2956 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2957 2958 if (oldmat->colmap) { 2959 #if defined(PETSC_USE_CTABLE) 2960 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2961 #else 2962 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2963 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2964 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2965 #endif 2966 } else a->colmap = NULL; 2967 if (oldmat->garray) { 2968 PetscInt len; 2969 len = oldmat->B->cmap->n; 2970 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2971 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2972 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2973 } else a->garray = NULL; 2974 2975 /* It may happen MatDuplicate is called with a non-assembled matrix 2976 In fact, MatDuplicate only requires the matrix to be preallocated 2977 This may happen inside a DMCreateMatrix_Shell */ 2978 if (oldmat->lvec) { 2979 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2980 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2981 } 2982 if (oldmat->Mvctx) { 2983 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2984 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2985 } 2986 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2987 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2988 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2989 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2990 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2991 *newmat = mat; 2992 PetscFunctionReturn(0); 2993 } 2994 2995 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2996 { 2997 PetscBool isbinary, ishdf5; 2998 PetscErrorCode ierr; 2999 3000 PetscFunctionBegin; 3001 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 3002 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 3003 /* force binary viewer to load .info file if it has not yet done so */ 3004 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3005 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3006 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 3007 if (isbinary) { 3008 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 3009 } else if (ishdf5) { 3010 #if defined(PETSC_HAVE_HDF5) 3011 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 3012 #else 3013 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3014 #endif 3015 } else { 3016 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3017 } 3018 PetscFunctionReturn(0); 3019 } 3020 3021 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3022 { 3023 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3024 PetscInt *rowidxs,*colidxs; 3025 PetscScalar *matvals; 3026 PetscErrorCode ierr; 3027 3028 PetscFunctionBegin; 3029 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3030 3031 /* read in matrix header */ 3032 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3033 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3034 M = header[1]; N = header[2]; nz = header[3]; 3035 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3036 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3037 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3038 3039 /* set block sizes from the viewer's .info file */ 3040 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3041 /* set global sizes if not set already */ 3042 if (mat->rmap->N < 0) mat->rmap->N = M; 3043 if (mat->cmap->N < 0) mat->cmap->N = N; 3044 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3045 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3046 3047 /* check if the matrix sizes are correct */ 3048 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3049 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3050 3051 /* read in row lengths and build row indices */ 3052 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3053 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3054 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3055 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3056 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr); 3057 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3058 /* read in column indices and matrix values */ 3059 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3060 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3061 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3062 /* store matrix indices and values */ 3063 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3064 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3065 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3066 PetscFunctionReturn(0); 3067 } 3068 3069 /* Not scalable because of ISAllGather() unless getting all columns. */ 3070 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3071 { 3072 PetscErrorCode ierr; 3073 IS iscol_local; 3074 PetscBool isstride; 3075 PetscMPIInt lisstride=0,gisstride; 3076 3077 PetscFunctionBegin; 3078 /* check if we are grabbing all columns*/ 3079 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3080 3081 if (isstride) { 3082 PetscInt start,len,mstart,mlen; 3083 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3084 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3085 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3086 if (mstart == start && mlen-mstart == len) lisstride = 1; 3087 } 3088 3089 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3090 if (gisstride) { 3091 PetscInt N; 3092 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3093 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3094 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3095 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3096 } else { 3097 PetscInt cbs; 3098 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3099 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3100 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3101 } 3102 3103 *isseq = iscol_local; 3104 PetscFunctionReturn(0); 3105 } 3106 3107 /* 3108 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3109 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3110 3111 Input Parameters: 3112 mat - matrix 3113 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3114 i.e., mat->rstart <= isrow[i] < mat->rend 3115 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3116 i.e., mat->cstart <= iscol[i] < mat->cend 3117 Output Parameter: 3118 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3119 iscol_o - sequential column index set for retrieving mat->B 3120 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3121 */ 3122 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3123 { 3124 PetscErrorCode ierr; 3125 Vec x,cmap; 3126 const PetscInt *is_idx; 3127 PetscScalar *xarray,*cmaparray; 3128 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3129 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3130 Mat B=a->B; 3131 Vec lvec=a->lvec,lcmap; 3132 PetscInt i,cstart,cend,Bn=B->cmap->N; 3133 MPI_Comm comm; 3134 VecScatter Mvctx=a->Mvctx; 3135 3136 PetscFunctionBegin; 3137 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3138 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3139 3140 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3141 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3142 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3143 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3144 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3145 3146 /* Get start indices */ 3147 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3148 isstart -= ncols; 3149 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3150 3151 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3152 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3153 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3154 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3155 for (i=0; i<ncols; i++) { 3156 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3157 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3158 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3159 } 3160 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3161 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3162 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3163 3164 /* Get iscol_d */ 3165 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3166 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3167 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3168 3169 /* Get isrow_d */ 3170 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3171 rstart = mat->rmap->rstart; 3172 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3173 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3174 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3175 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3176 3177 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3178 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3179 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3180 3181 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3182 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3183 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3184 3185 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3186 3187 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3188 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3189 3190 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3191 /* off-process column indices */ 3192 count = 0; 3193 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3194 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3195 3196 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3197 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3198 for (i=0; i<Bn; i++) { 3199 if (PetscRealPart(xarray[i]) > -1.0) { 3200 idx[count] = i; /* local column index in off-diagonal part B */ 3201 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3202 count++; 3203 } 3204 } 3205 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3206 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3207 3208 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3209 /* cannot ensure iscol_o has same blocksize as iscol! */ 3210 3211 ierr = PetscFree(idx);CHKERRQ(ierr); 3212 *garray = cmap1; 3213 3214 ierr = VecDestroy(&x);CHKERRQ(ierr); 3215 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3216 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3217 PetscFunctionReturn(0); 3218 } 3219 3220 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3221 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3222 { 3223 PetscErrorCode ierr; 3224 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3225 Mat M = NULL; 3226 MPI_Comm comm; 3227 IS iscol_d,isrow_d,iscol_o; 3228 Mat Asub = NULL,Bsub = NULL; 3229 PetscInt n; 3230 3231 PetscFunctionBegin; 3232 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3233 3234 if (call == MAT_REUSE_MATRIX) { 3235 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3236 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3237 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3238 3239 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3240 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3241 3242 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3243 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3244 3245 /* Update diagonal and off-diagonal portions of submat */ 3246 asub = (Mat_MPIAIJ*)(*submat)->data; 3247 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3248 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3249 if (n) { 3250 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3251 } 3252 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3253 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3254 3255 } else { /* call == MAT_INITIAL_MATRIX) */ 3256 const PetscInt *garray; 3257 PetscInt BsubN; 3258 3259 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3260 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3261 3262 /* Create local submatrices Asub and Bsub */ 3263 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3264 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3265 3266 /* Create submatrix M */ 3267 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3268 3269 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3270 asub = (Mat_MPIAIJ*)M->data; 3271 3272 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3273 n = asub->B->cmap->N; 3274 if (BsubN > n) { 3275 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3276 const PetscInt *idx; 3277 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3278 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3279 3280 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3281 j = 0; 3282 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3283 for (i=0; i<n; i++) { 3284 if (j >= BsubN) break; 3285 while (subgarray[i] > garray[j]) j++; 3286 3287 if (subgarray[i] == garray[j]) { 3288 idx_new[i] = idx[j++]; 3289 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3290 } 3291 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3292 3293 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3294 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3295 3296 } else if (BsubN < n) { 3297 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3298 } 3299 3300 ierr = PetscFree(garray);CHKERRQ(ierr); 3301 *submat = M; 3302 3303 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3304 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3305 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3306 3307 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3308 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3309 3310 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3311 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3312 } 3313 PetscFunctionReturn(0); 3314 } 3315 3316 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3317 { 3318 PetscErrorCode ierr; 3319 IS iscol_local=NULL,isrow_d; 3320 PetscInt csize; 3321 PetscInt n,i,j,start,end; 3322 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3323 MPI_Comm comm; 3324 3325 PetscFunctionBegin; 3326 /* If isrow has same processor distribution as mat, 3327 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3328 if (call == MAT_REUSE_MATRIX) { 3329 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3330 if (isrow_d) { 3331 sameRowDist = PETSC_TRUE; 3332 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3333 } else { 3334 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3335 if (iscol_local) { 3336 sameRowDist = PETSC_TRUE; 3337 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3338 } 3339 } 3340 } else { 3341 /* Check if isrow has same processor distribution as mat */ 3342 sameDist[0] = PETSC_FALSE; 3343 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3344 if (!n) { 3345 sameDist[0] = PETSC_TRUE; 3346 } else { 3347 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3348 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3349 if (i >= start && j < end) { 3350 sameDist[0] = PETSC_TRUE; 3351 } 3352 } 3353 3354 /* Check if iscol has same processor distribution as mat */ 3355 sameDist[1] = PETSC_FALSE; 3356 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3357 if (!n) { 3358 sameDist[1] = PETSC_TRUE; 3359 } else { 3360 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3361 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3362 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3363 } 3364 3365 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3366 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr); 3367 sameRowDist = tsameDist[0]; 3368 } 3369 3370 if (sameRowDist) { 3371 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3372 /* isrow and iscol have same processor distribution as mat */ 3373 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3374 PetscFunctionReturn(0); 3375 } else { /* sameRowDist */ 3376 /* isrow has same processor distribution as mat */ 3377 if (call == MAT_INITIAL_MATRIX) { 3378 PetscBool sorted; 3379 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3380 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3381 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3382 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3383 3384 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3385 if (sorted) { 3386 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3387 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3388 PetscFunctionReturn(0); 3389 } 3390 } else { /* call == MAT_REUSE_MATRIX */ 3391 IS iscol_sub; 3392 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3393 if (iscol_sub) { 3394 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3395 PetscFunctionReturn(0); 3396 } 3397 } 3398 } 3399 } 3400 3401 /* General case: iscol -> iscol_local which has global size of iscol */ 3402 if (call == MAT_REUSE_MATRIX) { 3403 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3404 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3405 } else { 3406 if (!iscol_local) { 3407 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3408 } 3409 } 3410 3411 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3412 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3413 3414 if (call == MAT_INITIAL_MATRIX) { 3415 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3416 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3417 } 3418 PetscFunctionReturn(0); 3419 } 3420 3421 /*@C 3422 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3423 and "off-diagonal" part of the matrix in CSR format. 3424 3425 Collective 3426 3427 Input Parameters: 3428 + comm - MPI communicator 3429 . A - "diagonal" portion of matrix 3430 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3431 - garray - global index of B columns 3432 3433 Output Parameter: 3434 . mat - the matrix, with input A as its local diagonal matrix 3435 Level: advanced 3436 3437 Notes: 3438 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3439 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3440 3441 .seealso: MatCreateMPIAIJWithSplitArrays() 3442 @*/ 3443 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3444 { 3445 PetscErrorCode ierr; 3446 Mat_MPIAIJ *maij; 3447 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3448 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3449 const PetscScalar *oa; 3450 Mat Bnew; 3451 PetscInt m,n,N; 3452 3453 PetscFunctionBegin; 3454 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3455 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3456 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3457 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3458 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3459 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3460 3461 /* Get global columns of mat */ 3462 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3463 3464 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3465 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3466 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3467 maij = (Mat_MPIAIJ*)(*mat)->data; 3468 3469 (*mat)->preallocated = PETSC_TRUE; 3470 3471 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3472 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3473 3474 /* Set A as diagonal portion of *mat */ 3475 maij->A = A; 3476 3477 nz = oi[m]; 3478 for (i=0; i<nz; i++) { 3479 col = oj[i]; 3480 oj[i] = garray[col]; 3481 } 3482 3483 /* Set Bnew as off-diagonal portion of *mat */ 3484 ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr); 3485 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr); 3486 ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr); 3487 bnew = (Mat_SeqAIJ*)Bnew->data; 3488 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3489 maij->B = Bnew; 3490 3491 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3492 3493 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3494 b->free_a = PETSC_FALSE; 3495 b->free_ij = PETSC_FALSE; 3496 ierr = MatDestroy(&B);CHKERRQ(ierr); 3497 3498 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3499 bnew->free_a = PETSC_TRUE; 3500 bnew->free_ij = PETSC_TRUE; 3501 3502 /* condense columns of maij->B */ 3503 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3504 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3505 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3506 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3507 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3508 PetscFunctionReturn(0); 3509 } 3510 3511 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3512 3513 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3514 { 3515 PetscErrorCode ierr; 3516 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3517 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3518 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3519 Mat M,Msub,B=a->B; 3520 MatScalar *aa; 3521 Mat_SeqAIJ *aij; 3522 PetscInt *garray = a->garray,*colsub,Ncols; 3523 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3524 IS iscol_sub,iscmap; 3525 const PetscInt *is_idx,*cmap; 3526 PetscBool allcolumns=PETSC_FALSE; 3527 MPI_Comm comm; 3528 3529 PetscFunctionBegin; 3530 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3531 if (call == MAT_REUSE_MATRIX) { 3532 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3533 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3534 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3535 3536 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3537 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3538 3539 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3540 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3541 3542 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3543 3544 } else { /* call == MAT_INITIAL_MATRIX) */ 3545 PetscBool flg; 3546 3547 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3548 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3549 3550 /* (1) iscol -> nonscalable iscol_local */ 3551 /* Check for special case: each processor gets entire matrix columns */ 3552 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3553 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3554 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3555 if (allcolumns) { 3556 iscol_sub = iscol_local; 3557 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3558 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3559 3560 } else { 3561 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3562 PetscInt *idx,*cmap1,k; 3563 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3564 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3565 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3566 count = 0; 3567 k = 0; 3568 for (i=0; i<Ncols; i++) { 3569 j = is_idx[i]; 3570 if (j >= cstart && j < cend) { 3571 /* diagonal part of mat */ 3572 idx[count] = j; 3573 cmap1[count++] = i; /* column index in submat */ 3574 } else if (Bn) { 3575 /* off-diagonal part of mat */ 3576 if (j == garray[k]) { 3577 idx[count] = j; 3578 cmap1[count++] = i; /* column index in submat */ 3579 } else if (j > garray[k]) { 3580 while (j > garray[k] && k < Bn-1) k++; 3581 if (j == garray[k]) { 3582 idx[count] = j; 3583 cmap1[count++] = i; /* column index in submat */ 3584 } 3585 } 3586 } 3587 } 3588 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3589 3590 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3591 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3592 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3593 3594 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3595 } 3596 3597 /* (3) Create sequential Msub */ 3598 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3599 } 3600 3601 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3602 aij = (Mat_SeqAIJ*)(Msub)->data; 3603 ii = aij->i; 3604 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3605 3606 /* 3607 m - number of local rows 3608 Ncols - number of columns (same on all processors) 3609 rstart - first row in new global matrix generated 3610 */ 3611 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3612 3613 if (call == MAT_INITIAL_MATRIX) { 3614 /* (4) Create parallel newmat */ 3615 PetscMPIInt rank,size; 3616 PetscInt csize; 3617 3618 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3619 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3620 3621 /* 3622 Determine the number of non-zeros in the diagonal and off-diagonal 3623 portions of the matrix in order to do correct preallocation 3624 */ 3625 3626 /* first get start and end of "diagonal" columns */ 3627 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3628 if (csize == PETSC_DECIDE) { 3629 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3630 if (mglobal == Ncols) { /* square matrix */ 3631 nlocal = m; 3632 } else { 3633 nlocal = Ncols/size + ((Ncols % size) > rank); 3634 } 3635 } else { 3636 nlocal = csize; 3637 } 3638 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3639 rstart = rend - nlocal; 3640 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3641 3642 /* next, compute all the lengths */ 3643 jj = aij->j; 3644 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3645 olens = dlens + m; 3646 for (i=0; i<m; i++) { 3647 jend = ii[i+1] - ii[i]; 3648 olen = 0; 3649 dlen = 0; 3650 for (j=0; j<jend; j++) { 3651 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3652 else dlen++; 3653 jj++; 3654 } 3655 olens[i] = olen; 3656 dlens[i] = dlen; 3657 } 3658 3659 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3660 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3661 3662 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3663 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3664 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3665 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3666 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3667 ierr = PetscFree(dlens);CHKERRQ(ierr); 3668 3669 } else { /* call == MAT_REUSE_MATRIX */ 3670 M = *newmat; 3671 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3672 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3673 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3674 /* 3675 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3676 rather than the slower MatSetValues(). 3677 */ 3678 M->was_assembled = PETSC_TRUE; 3679 M->assembled = PETSC_FALSE; 3680 } 3681 3682 /* (5) Set values of Msub to *newmat */ 3683 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3684 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3685 3686 jj = aij->j; 3687 ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3688 for (i=0; i<m; i++) { 3689 row = rstart + i; 3690 nz = ii[i+1] - ii[i]; 3691 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3692 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3693 jj += nz; aa += nz; 3694 } 3695 ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3696 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3697 3698 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3699 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3700 3701 ierr = PetscFree(colsub);CHKERRQ(ierr); 3702 3703 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3704 if (call == MAT_INITIAL_MATRIX) { 3705 *newmat = M; 3706 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3707 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3708 3709 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3710 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3711 3712 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3713 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3714 3715 if (iscol_local) { 3716 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3717 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3718 } 3719 } 3720 PetscFunctionReturn(0); 3721 } 3722 3723 /* 3724 Not great since it makes two copies of the submatrix, first an SeqAIJ 3725 in local and then by concatenating the local matrices the end result. 3726 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3727 3728 Note: This requires a sequential iscol with all indices. 3729 */ 3730 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3731 { 3732 PetscErrorCode ierr; 3733 PetscMPIInt rank,size; 3734 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3735 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3736 Mat M,Mreuse; 3737 MatScalar *aa,*vwork; 3738 MPI_Comm comm; 3739 Mat_SeqAIJ *aij; 3740 PetscBool colflag,allcolumns=PETSC_FALSE; 3741 3742 PetscFunctionBegin; 3743 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3744 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3745 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3746 3747 /* Check for special case: each processor gets entire matrix columns */ 3748 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3749 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3750 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3751 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3752 3753 if (call == MAT_REUSE_MATRIX) { 3754 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3755 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3756 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3757 } else { 3758 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3759 } 3760 3761 /* 3762 m - number of local rows 3763 n - number of columns (same on all processors) 3764 rstart - first row in new global matrix generated 3765 */ 3766 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3767 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3768 if (call == MAT_INITIAL_MATRIX) { 3769 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3770 ii = aij->i; 3771 jj = aij->j; 3772 3773 /* 3774 Determine the number of non-zeros in the diagonal and off-diagonal 3775 portions of the matrix in order to do correct preallocation 3776 */ 3777 3778 /* first get start and end of "diagonal" columns */ 3779 if (csize == PETSC_DECIDE) { 3780 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3781 if (mglobal == n) { /* square matrix */ 3782 nlocal = m; 3783 } else { 3784 nlocal = n/size + ((n % size) > rank); 3785 } 3786 } else { 3787 nlocal = csize; 3788 } 3789 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3790 rstart = rend - nlocal; 3791 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3792 3793 /* next, compute all the lengths */ 3794 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3795 olens = dlens + m; 3796 for (i=0; i<m; i++) { 3797 jend = ii[i+1] - ii[i]; 3798 olen = 0; 3799 dlen = 0; 3800 for (j=0; j<jend; j++) { 3801 if (*jj < rstart || *jj >= rend) olen++; 3802 else dlen++; 3803 jj++; 3804 } 3805 olens[i] = olen; 3806 dlens[i] = dlen; 3807 } 3808 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3809 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3810 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3811 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3812 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3813 ierr = PetscFree(dlens);CHKERRQ(ierr); 3814 } else { 3815 PetscInt ml,nl; 3816 3817 M = *newmat; 3818 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3819 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3820 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3821 /* 3822 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3823 rather than the slower MatSetValues(). 3824 */ 3825 M->was_assembled = PETSC_TRUE; 3826 M->assembled = PETSC_FALSE; 3827 } 3828 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3829 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3830 ii = aij->i; 3831 jj = aij->j; 3832 3833 /* trigger copy to CPU if needed */ 3834 ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3835 for (i=0; i<m; i++) { 3836 row = rstart + i; 3837 nz = ii[i+1] - ii[i]; 3838 cwork = jj; jj += nz; 3839 vwork = aa; aa += nz; 3840 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3841 } 3842 ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3843 3844 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3845 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3846 *newmat = M; 3847 3848 /* save submatrix used in processor for next request */ 3849 if (call == MAT_INITIAL_MATRIX) { 3850 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3851 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3852 } 3853 PetscFunctionReturn(0); 3854 } 3855 3856 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3857 { 3858 PetscInt m,cstart, cend,j,nnz,i,d; 3859 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3860 const PetscInt *JJ; 3861 PetscErrorCode ierr; 3862 PetscBool nooffprocentries; 3863 3864 PetscFunctionBegin; 3865 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3866 3867 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3868 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3869 m = B->rmap->n; 3870 cstart = B->cmap->rstart; 3871 cend = B->cmap->rend; 3872 rstart = B->rmap->rstart; 3873 3874 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3875 3876 if (PetscDefined(USE_DEBUG)) { 3877 for (i=0; i<m; i++) { 3878 nnz = Ii[i+1]- Ii[i]; 3879 JJ = J + Ii[i]; 3880 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3881 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3882 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3883 } 3884 } 3885 3886 for (i=0; i<m; i++) { 3887 nnz = Ii[i+1]- Ii[i]; 3888 JJ = J + Ii[i]; 3889 nnz_max = PetscMax(nnz_max,nnz); 3890 d = 0; 3891 for (j=0; j<nnz; j++) { 3892 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3893 } 3894 d_nnz[i] = d; 3895 o_nnz[i] = nnz - d; 3896 } 3897 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3898 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3899 3900 for (i=0; i<m; i++) { 3901 ii = i + rstart; 3902 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3903 } 3904 nooffprocentries = B->nooffprocentries; 3905 B->nooffprocentries = PETSC_TRUE; 3906 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3907 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3908 B->nooffprocentries = nooffprocentries; 3909 3910 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3911 PetscFunctionReturn(0); 3912 } 3913 3914 /*@ 3915 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3916 (the default parallel PETSc format). 3917 3918 Collective 3919 3920 Input Parameters: 3921 + B - the matrix 3922 . i - the indices into j for the start of each local row (starts with zero) 3923 . j - the column indices for each local row (starts with zero) 3924 - v - optional values in the matrix 3925 3926 Level: developer 3927 3928 Notes: 3929 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3930 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3931 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3932 3933 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3934 3935 The format which is used for the sparse matrix input, is equivalent to a 3936 row-major ordering.. i.e for the following matrix, the input data expected is 3937 as shown 3938 3939 $ 1 0 0 3940 $ 2 0 3 P0 3941 $ ------- 3942 $ 4 5 6 P1 3943 $ 3944 $ Process0 [P0]: rows_owned=[0,1] 3945 $ i = {0,1,3} [size = nrow+1 = 2+1] 3946 $ j = {0,0,2} [size = 3] 3947 $ v = {1,2,3} [size = 3] 3948 $ 3949 $ Process1 [P1]: rows_owned=[2] 3950 $ i = {0,3} [size = nrow+1 = 1+1] 3951 $ j = {0,1,2} [size = 3] 3952 $ v = {4,5,6} [size = 3] 3953 3954 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3955 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3956 @*/ 3957 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3958 { 3959 PetscErrorCode ierr; 3960 3961 PetscFunctionBegin; 3962 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3963 PetscFunctionReturn(0); 3964 } 3965 3966 /*@C 3967 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3968 (the default parallel PETSc format). For good matrix assembly performance 3969 the user should preallocate the matrix storage by setting the parameters 3970 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3971 performance can be increased by more than a factor of 50. 3972 3973 Collective 3974 3975 Input Parameters: 3976 + B - the matrix 3977 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3978 (same value is used for all local rows) 3979 . d_nnz - array containing the number of nonzeros in the various rows of the 3980 DIAGONAL portion of the local submatrix (possibly different for each row) 3981 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3982 The size of this array is equal to the number of local rows, i.e 'm'. 3983 For matrices that will be factored, you must leave room for (and set) 3984 the diagonal entry even if it is zero. 3985 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3986 submatrix (same value is used for all local rows). 3987 - o_nnz - array containing the number of nonzeros in the various rows of the 3988 OFF-DIAGONAL portion of the local submatrix (possibly different for 3989 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3990 structure. The size of this array is equal to the number 3991 of local rows, i.e 'm'. 3992 3993 If the *_nnz parameter is given then the *_nz parameter is ignored 3994 3995 The AIJ format (also called the Yale sparse matrix format or 3996 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3997 storage. The stored row and column indices begin with zero. 3998 See Users-Manual: ch_mat for details. 3999 4000 The parallel matrix is partitioned such that the first m0 rows belong to 4001 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4002 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4003 4004 The DIAGONAL portion of the local submatrix of a processor can be defined 4005 as the submatrix which is obtained by extraction the part corresponding to 4006 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4007 first row that belongs to the processor, r2 is the last row belonging to 4008 the this processor, and c1-c2 is range of indices of the local part of a 4009 vector suitable for applying the matrix to. This is an mxn matrix. In the 4010 common case of a square matrix, the row and column ranges are the same and 4011 the DIAGONAL part is also square. The remaining portion of the local 4012 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4013 4014 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4015 4016 You can call MatGetInfo() to get information on how effective the preallocation was; 4017 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4018 You can also run with the option -info and look for messages with the string 4019 malloc in them to see if additional memory allocation was needed. 4020 4021 Example usage: 4022 4023 Consider the following 8x8 matrix with 34 non-zero values, that is 4024 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4025 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4026 as follows: 4027 4028 .vb 4029 1 2 0 | 0 3 0 | 0 4 4030 Proc0 0 5 6 | 7 0 0 | 8 0 4031 9 0 10 | 11 0 0 | 12 0 4032 ------------------------------------- 4033 13 0 14 | 15 16 17 | 0 0 4034 Proc1 0 18 0 | 19 20 21 | 0 0 4035 0 0 0 | 22 23 0 | 24 0 4036 ------------------------------------- 4037 Proc2 25 26 27 | 0 0 28 | 29 0 4038 30 0 0 | 31 32 33 | 0 34 4039 .ve 4040 4041 This can be represented as a collection of submatrices as: 4042 4043 .vb 4044 A B C 4045 D E F 4046 G H I 4047 .ve 4048 4049 Where the submatrices A,B,C are owned by proc0, D,E,F are 4050 owned by proc1, G,H,I are owned by proc2. 4051 4052 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4053 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4054 The 'M','N' parameters are 8,8, and have the same values on all procs. 4055 4056 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4057 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4058 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4059 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4060 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4061 matrix, ans [DF] as another SeqAIJ matrix. 4062 4063 When d_nz, o_nz parameters are specified, d_nz storage elements are 4064 allocated for every row of the local diagonal submatrix, and o_nz 4065 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4066 One way to choose d_nz and o_nz is to use the max nonzerors per local 4067 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4068 In this case, the values of d_nz,o_nz are: 4069 .vb 4070 proc0 : dnz = 2, o_nz = 2 4071 proc1 : dnz = 3, o_nz = 2 4072 proc2 : dnz = 1, o_nz = 4 4073 .ve 4074 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4075 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4076 for proc3. i.e we are using 12+15+10=37 storage locations to store 4077 34 values. 4078 4079 When d_nnz, o_nnz parameters are specified, the storage is specified 4080 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4081 In the above case the values for d_nnz,o_nnz are: 4082 .vb 4083 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4084 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4085 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4086 .ve 4087 Here the space allocated is sum of all the above values i.e 34, and 4088 hence pre-allocation is perfect. 4089 4090 Level: intermediate 4091 4092 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4093 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4094 @*/ 4095 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4096 { 4097 PetscErrorCode ierr; 4098 4099 PetscFunctionBegin; 4100 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4101 PetscValidType(B,1); 4102 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4103 PetscFunctionReturn(0); 4104 } 4105 4106 /*@ 4107 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4108 CSR format for the local rows. 4109 4110 Collective 4111 4112 Input Parameters: 4113 + comm - MPI communicator 4114 . m - number of local rows (Cannot be PETSC_DECIDE) 4115 . n - This value should be the same as the local size used in creating the 4116 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4117 calculated if N is given) For square matrices n is almost always m. 4118 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4119 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4120 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4121 . j - column indices 4122 - a - matrix values 4123 4124 Output Parameter: 4125 . mat - the matrix 4126 4127 Level: intermediate 4128 4129 Notes: 4130 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4131 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4132 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4133 4134 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4135 4136 The format which is used for the sparse matrix input, is equivalent to a 4137 row-major ordering.. i.e for the following matrix, the input data expected is 4138 as shown 4139 4140 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4141 4142 $ 1 0 0 4143 $ 2 0 3 P0 4144 $ ------- 4145 $ 4 5 6 P1 4146 $ 4147 $ Process0 [P0]: rows_owned=[0,1] 4148 $ i = {0,1,3} [size = nrow+1 = 2+1] 4149 $ j = {0,0,2} [size = 3] 4150 $ v = {1,2,3} [size = 3] 4151 $ 4152 $ Process1 [P1]: rows_owned=[2] 4153 $ i = {0,3} [size = nrow+1 = 1+1] 4154 $ j = {0,1,2} [size = 3] 4155 $ v = {4,5,6} [size = 3] 4156 4157 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4158 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4159 @*/ 4160 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4161 { 4162 PetscErrorCode ierr; 4163 4164 PetscFunctionBegin; 4165 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4166 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4167 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4168 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4169 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4170 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4171 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4172 PetscFunctionReturn(0); 4173 } 4174 4175 /*@ 4176 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4177 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4178 4179 Collective 4180 4181 Input Parameters: 4182 + mat - the matrix 4183 . m - number of local rows (Cannot be PETSC_DECIDE) 4184 . n - This value should be the same as the local size used in creating the 4185 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4186 calculated if N is given) For square matrices n is almost always m. 4187 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4188 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4189 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4190 . J - column indices 4191 - v - matrix values 4192 4193 Level: intermediate 4194 4195 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4196 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4197 @*/ 4198 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4199 { 4200 PetscErrorCode ierr; 4201 PetscInt cstart,nnz,i,j; 4202 PetscInt *ld; 4203 PetscBool nooffprocentries; 4204 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4205 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4206 PetscScalar *ad = Ad->a, *ao = Ao->a; 4207 const PetscInt *Adi = Ad->i; 4208 PetscInt ldi,Iii,md; 4209 4210 PetscFunctionBegin; 4211 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4212 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4213 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4214 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4215 4216 cstart = mat->cmap->rstart; 4217 if (!Aij->ld) { 4218 /* count number of entries below block diagonal */ 4219 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4220 Aij->ld = ld; 4221 for (i=0; i<m; i++) { 4222 nnz = Ii[i+1]- Ii[i]; 4223 j = 0; 4224 while (J[j] < cstart && j < nnz) {j++;} 4225 J += nnz; 4226 ld[i] = j; 4227 } 4228 } else { 4229 ld = Aij->ld; 4230 } 4231 4232 for (i=0; i<m; i++) { 4233 nnz = Ii[i+1]- Ii[i]; 4234 Iii = Ii[i]; 4235 ldi = ld[i]; 4236 md = Adi[i+1]-Adi[i]; 4237 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4238 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4239 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4240 ad += md; 4241 ao += nnz - md; 4242 } 4243 nooffprocentries = mat->nooffprocentries; 4244 mat->nooffprocentries = PETSC_TRUE; 4245 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4246 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4247 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4248 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4249 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4250 mat->nooffprocentries = nooffprocentries; 4251 PetscFunctionReturn(0); 4252 } 4253 4254 /*@C 4255 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4256 (the default parallel PETSc format). For good matrix assembly performance 4257 the user should preallocate the matrix storage by setting the parameters 4258 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4259 performance can be increased by more than a factor of 50. 4260 4261 Collective 4262 4263 Input Parameters: 4264 + comm - MPI communicator 4265 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4266 This value should be the same as the local size used in creating the 4267 y vector for the matrix-vector product y = Ax. 4268 . n - This value should be the same as the local size used in creating the 4269 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4270 calculated if N is given) For square matrices n is almost always m. 4271 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4272 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4273 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4274 (same value is used for all local rows) 4275 . d_nnz - array containing the number of nonzeros in the various rows of the 4276 DIAGONAL portion of the local submatrix (possibly different for each row) 4277 or NULL, if d_nz is used to specify the nonzero structure. 4278 The size of this array is equal to the number of local rows, i.e 'm'. 4279 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4280 submatrix (same value is used for all local rows). 4281 - o_nnz - array containing the number of nonzeros in the various rows of the 4282 OFF-DIAGONAL portion of the local submatrix (possibly different for 4283 each row) or NULL, if o_nz is used to specify the nonzero 4284 structure. The size of this array is equal to the number 4285 of local rows, i.e 'm'. 4286 4287 Output Parameter: 4288 . A - the matrix 4289 4290 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4291 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4292 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4293 4294 Notes: 4295 If the *_nnz parameter is given then the *_nz parameter is ignored 4296 4297 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4298 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4299 storage requirements for this matrix. 4300 4301 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4302 processor than it must be used on all processors that share the object for 4303 that argument. 4304 4305 The user MUST specify either the local or global matrix dimensions 4306 (possibly both). 4307 4308 The parallel matrix is partitioned across processors such that the 4309 first m0 rows belong to process 0, the next m1 rows belong to 4310 process 1, the next m2 rows belong to process 2 etc.. where 4311 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4312 values corresponding to [m x N] submatrix. 4313 4314 The columns are logically partitioned with the n0 columns belonging 4315 to 0th partition, the next n1 columns belonging to the next 4316 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4317 4318 The DIAGONAL portion of the local submatrix on any given processor 4319 is the submatrix corresponding to the rows and columns m,n 4320 corresponding to the given processor. i.e diagonal matrix on 4321 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4322 etc. The remaining portion of the local submatrix [m x (N-n)] 4323 constitute the OFF-DIAGONAL portion. The example below better 4324 illustrates this concept. 4325 4326 For a square global matrix we define each processor's diagonal portion 4327 to be its local rows and the corresponding columns (a square submatrix); 4328 each processor's off-diagonal portion encompasses the remainder of the 4329 local matrix (a rectangular submatrix). 4330 4331 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4332 4333 When calling this routine with a single process communicator, a matrix of 4334 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4335 type of communicator, use the construction mechanism 4336 .vb 4337 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4338 .ve 4339 4340 $ MatCreate(...,&A); 4341 $ MatSetType(A,MATMPIAIJ); 4342 $ MatSetSizes(A, m,n,M,N); 4343 $ MatMPIAIJSetPreallocation(A,...); 4344 4345 By default, this format uses inodes (identical nodes) when possible. 4346 We search for consecutive rows with the same nonzero structure, thereby 4347 reusing matrix information to achieve increased efficiency. 4348 4349 Options Database Keys: 4350 + -mat_no_inode - Do not use inodes 4351 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4352 4353 Example usage: 4354 4355 Consider the following 8x8 matrix with 34 non-zero values, that is 4356 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4357 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4358 as follows 4359 4360 .vb 4361 1 2 0 | 0 3 0 | 0 4 4362 Proc0 0 5 6 | 7 0 0 | 8 0 4363 9 0 10 | 11 0 0 | 12 0 4364 ------------------------------------- 4365 13 0 14 | 15 16 17 | 0 0 4366 Proc1 0 18 0 | 19 20 21 | 0 0 4367 0 0 0 | 22 23 0 | 24 0 4368 ------------------------------------- 4369 Proc2 25 26 27 | 0 0 28 | 29 0 4370 30 0 0 | 31 32 33 | 0 34 4371 .ve 4372 4373 This can be represented as a collection of submatrices as 4374 4375 .vb 4376 A B C 4377 D E F 4378 G H I 4379 .ve 4380 4381 Where the submatrices A,B,C are owned by proc0, D,E,F are 4382 owned by proc1, G,H,I are owned by proc2. 4383 4384 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4385 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4386 The 'M','N' parameters are 8,8, and have the same values on all procs. 4387 4388 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4389 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4390 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4391 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4392 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4393 matrix, ans [DF] as another SeqAIJ matrix. 4394 4395 When d_nz, o_nz parameters are specified, d_nz storage elements are 4396 allocated for every row of the local diagonal submatrix, and o_nz 4397 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4398 One way to choose d_nz and o_nz is to use the max nonzerors per local 4399 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4400 In this case, the values of d_nz,o_nz are 4401 .vb 4402 proc0 : dnz = 2, o_nz = 2 4403 proc1 : dnz = 3, o_nz = 2 4404 proc2 : dnz = 1, o_nz = 4 4405 .ve 4406 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4407 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4408 for proc3. i.e we are using 12+15+10=37 storage locations to store 4409 34 values. 4410 4411 When d_nnz, o_nnz parameters are specified, the storage is specified 4412 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4413 In the above case the values for d_nnz,o_nnz are 4414 .vb 4415 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4416 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4417 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4418 .ve 4419 Here the space allocated is sum of all the above values i.e 34, and 4420 hence pre-allocation is perfect. 4421 4422 Level: intermediate 4423 4424 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4425 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4426 @*/ 4427 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4428 { 4429 PetscErrorCode ierr; 4430 PetscMPIInt size; 4431 4432 PetscFunctionBegin; 4433 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4434 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4435 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4436 if (size > 1) { 4437 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4438 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4439 } else { 4440 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4441 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4442 } 4443 PetscFunctionReturn(0); 4444 } 4445 4446 /*@C 4447 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4448 4449 Not collective 4450 4451 Input Parameter: 4452 . A - The MPIAIJ matrix 4453 4454 Output Parameters: 4455 + Ad - The local diagonal block as a SeqAIJ matrix 4456 . Ao - The local off-diagonal block as a SeqAIJ matrix 4457 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4458 4459 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4460 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4461 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4462 local column numbers to global column numbers in the original matrix. 4463 4464 Level: intermediate 4465 4466 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4467 @*/ 4468 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4469 { 4470 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4471 PetscBool flg; 4472 PetscErrorCode ierr; 4473 4474 PetscFunctionBegin; 4475 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4476 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4477 if (Ad) *Ad = a->A; 4478 if (Ao) *Ao = a->B; 4479 if (colmap) *colmap = a->garray; 4480 PetscFunctionReturn(0); 4481 } 4482 4483 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4484 { 4485 PetscErrorCode ierr; 4486 PetscInt m,N,i,rstart,nnz,Ii; 4487 PetscInt *indx; 4488 PetscScalar *values; 4489 MatType rootType; 4490 4491 PetscFunctionBegin; 4492 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4493 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4494 PetscInt *dnz,*onz,sum,bs,cbs; 4495 4496 if (n == PETSC_DECIDE) { 4497 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4498 } 4499 /* Check sum(n) = N */ 4500 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4501 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4502 4503 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4504 rstart -= m; 4505 4506 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4507 for (i=0; i<m; i++) { 4508 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4509 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4510 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4511 } 4512 4513 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4514 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4515 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4516 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4517 ierr = MatGetRootType_Private(inmat,&rootType);CHKERRQ(ierr); 4518 ierr = MatSetType(*outmat,rootType);CHKERRQ(ierr); 4519 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4520 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4521 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4522 ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4523 } 4524 4525 /* numeric phase */ 4526 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4527 for (i=0; i<m; i++) { 4528 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4529 Ii = i + rstart; 4530 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4531 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4532 } 4533 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4534 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4535 PetscFunctionReturn(0); 4536 } 4537 4538 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4539 { 4540 PetscErrorCode ierr; 4541 PetscMPIInt rank; 4542 PetscInt m,N,i,rstart,nnz; 4543 size_t len; 4544 const PetscInt *indx; 4545 PetscViewer out; 4546 char *name; 4547 Mat B; 4548 const PetscScalar *values; 4549 4550 PetscFunctionBegin; 4551 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4552 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4553 /* Should this be the type of the diagonal block of A? */ 4554 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4555 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4556 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4557 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4558 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4559 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4560 for (i=0; i<m; i++) { 4561 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4562 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4563 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4564 } 4565 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4566 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4567 4568 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 4569 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4570 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4571 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4572 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4573 ierr = PetscFree(name);CHKERRQ(ierr); 4574 ierr = MatView(B,out);CHKERRQ(ierr); 4575 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4576 ierr = MatDestroy(&B);CHKERRQ(ierr); 4577 PetscFunctionReturn(0); 4578 } 4579 4580 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4581 { 4582 PetscErrorCode ierr; 4583 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4584 4585 PetscFunctionBegin; 4586 if (!merge) PetscFunctionReturn(0); 4587 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4588 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4589 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4590 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4591 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4592 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4593 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4594 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4595 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4596 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4597 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4598 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4599 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4600 ierr = PetscFree(merge);CHKERRQ(ierr); 4601 PetscFunctionReturn(0); 4602 } 4603 4604 #include <../src/mat/utils/freespace.h> 4605 #include <petscbt.h> 4606 4607 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4608 { 4609 PetscErrorCode ierr; 4610 MPI_Comm comm; 4611 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4612 PetscMPIInt size,rank,taga,*len_s; 4613 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4614 PetscInt proc,m; 4615 PetscInt **buf_ri,**buf_rj; 4616 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4617 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4618 MPI_Request *s_waits,*r_waits; 4619 MPI_Status *status; 4620 MatScalar *aa=a->a; 4621 MatScalar **abuf_r,*ba_i; 4622 Mat_Merge_SeqsToMPI *merge; 4623 PetscContainer container; 4624 4625 PetscFunctionBegin; 4626 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4627 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4628 4629 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4630 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4631 4632 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4633 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4634 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4635 4636 bi = merge->bi; 4637 bj = merge->bj; 4638 buf_ri = merge->buf_ri; 4639 buf_rj = merge->buf_rj; 4640 4641 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4642 owners = merge->rowmap->range; 4643 len_s = merge->len_s; 4644 4645 /* send and recv matrix values */ 4646 /*-----------------------------*/ 4647 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4648 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4649 4650 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4651 for (proc=0,k=0; proc<size; proc++) { 4652 if (!len_s[proc]) continue; 4653 i = owners[proc]; 4654 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr); 4655 k++; 4656 } 4657 4658 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);} 4659 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);} 4660 ierr = PetscFree(status);CHKERRQ(ierr); 4661 4662 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4663 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4664 4665 /* insert mat values of mpimat */ 4666 /*----------------------------*/ 4667 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4668 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4669 4670 for (k=0; k<merge->nrecv; k++) { 4671 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4672 nrows = *(buf_ri_k[k]); 4673 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4674 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4675 } 4676 4677 /* set values of ba */ 4678 m = merge->rowmap->n; 4679 for (i=0; i<m; i++) { 4680 arow = owners[rank] + i; 4681 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4682 bnzi = bi[i+1] - bi[i]; 4683 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4684 4685 /* add local non-zero vals of this proc's seqmat into ba */ 4686 anzi = ai[arow+1] - ai[arow]; 4687 aj = a->j + ai[arow]; 4688 aa = a->a + ai[arow]; 4689 nextaj = 0; 4690 for (j=0; nextaj<anzi; j++) { 4691 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4692 ba_i[j] += aa[nextaj++]; 4693 } 4694 } 4695 4696 /* add received vals into ba */ 4697 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4698 /* i-th row */ 4699 if (i == *nextrow[k]) { 4700 anzi = *(nextai[k]+1) - *nextai[k]; 4701 aj = buf_rj[k] + *(nextai[k]); 4702 aa = abuf_r[k] + *(nextai[k]); 4703 nextaj = 0; 4704 for (j=0; nextaj<anzi; j++) { 4705 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4706 ba_i[j] += aa[nextaj++]; 4707 } 4708 } 4709 nextrow[k]++; nextai[k]++; 4710 } 4711 } 4712 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4713 } 4714 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4715 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4716 4717 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4718 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4719 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4720 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4721 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4722 PetscFunctionReturn(0); 4723 } 4724 4725 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4726 { 4727 PetscErrorCode ierr; 4728 Mat B_mpi; 4729 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4730 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4731 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4732 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4733 PetscInt len,proc,*dnz,*onz,bs,cbs; 4734 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4735 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4736 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4737 MPI_Status *status; 4738 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4739 PetscBT lnkbt; 4740 Mat_Merge_SeqsToMPI *merge; 4741 PetscContainer container; 4742 4743 PetscFunctionBegin; 4744 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4745 4746 /* make sure it is a PETSc comm */ 4747 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4748 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4749 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4750 4751 ierr = PetscNew(&merge);CHKERRQ(ierr); 4752 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4753 4754 /* determine row ownership */ 4755 /*---------------------------------------------------------*/ 4756 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4757 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4758 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4759 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4760 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4761 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4762 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4763 4764 m = merge->rowmap->n; 4765 owners = merge->rowmap->range; 4766 4767 /* determine the number of messages to send, their lengths */ 4768 /*---------------------------------------------------------*/ 4769 len_s = merge->len_s; 4770 4771 len = 0; /* length of buf_si[] */ 4772 merge->nsend = 0; 4773 for (proc=0; proc<size; proc++) { 4774 len_si[proc] = 0; 4775 if (proc == rank) { 4776 len_s[proc] = 0; 4777 } else { 4778 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4779 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4780 } 4781 if (len_s[proc]) { 4782 merge->nsend++; 4783 nrows = 0; 4784 for (i=owners[proc]; i<owners[proc+1]; i++) { 4785 if (ai[i+1] > ai[i]) nrows++; 4786 } 4787 len_si[proc] = 2*(nrows+1); 4788 len += len_si[proc]; 4789 } 4790 } 4791 4792 /* determine the number and length of messages to receive for ij-structure */ 4793 /*-------------------------------------------------------------------------*/ 4794 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4795 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4796 4797 /* post the Irecv of j-structure */ 4798 /*-------------------------------*/ 4799 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4800 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4801 4802 /* post the Isend of j-structure */ 4803 /*--------------------------------*/ 4804 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4805 4806 for (proc=0, k=0; proc<size; proc++) { 4807 if (!len_s[proc]) continue; 4808 i = owners[proc]; 4809 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr); 4810 k++; 4811 } 4812 4813 /* receives and sends of j-structure are complete */ 4814 /*------------------------------------------------*/ 4815 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);} 4816 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);} 4817 4818 /* send and recv i-structure */ 4819 /*---------------------------*/ 4820 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4821 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4822 4823 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4824 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4825 for (proc=0,k=0; proc<size; proc++) { 4826 if (!len_s[proc]) continue; 4827 /* form outgoing message for i-structure: 4828 buf_si[0]: nrows to be sent 4829 [1:nrows]: row index (global) 4830 [nrows+1:2*nrows+1]: i-structure index 4831 */ 4832 /*-------------------------------------------*/ 4833 nrows = len_si[proc]/2 - 1; 4834 buf_si_i = buf_si + nrows+1; 4835 buf_si[0] = nrows; 4836 buf_si_i[0] = 0; 4837 nrows = 0; 4838 for (i=owners[proc]; i<owners[proc+1]; i++) { 4839 anzi = ai[i+1] - ai[i]; 4840 if (anzi) { 4841 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4842 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4843 nrows++; 4844 } 4845 } 4846 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr); 4847 k++; 4848 buf_si += len_si[proc]; 4849 } 4850 4851 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);} 4852 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);} 4853 4854 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4855 for (i=0; i<merge->nrecv; i++) { 4856 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4857 } 4858 4859 ierr = PetscFree(len_si);CHKERRQ(ierr); 4860 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4861 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4862 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4863 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4864 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4865 ierr = PetscFree(status);CHKERRQ(ierr); 4866 4867 /* compute a local seq matrix in each processor */ 4868 /*----------------------------------------------*/ 4869 /* allocate bi array and free space for accumulating nonzero column info */ 4870 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4871 bi[0] = 0; 4872 4873 /* create and initialize a linked list */ 4874 nlnk = N+1; 4875 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4876 4877 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4878 len = ai[owners[rank+1]] - ai[owners[rank]]; 4879 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4880 4881 current_space = free_space; 4882 4883 /* determine symbolic info for each local row */ 4884 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4885 4886 for (k=0; k<merge->nrecv; k++) { 4887 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4888 nrows = *buf_ri_k[k]; 4889 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4890 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4891 } 4892 4893 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4894 len = 0; 4895 for (i=0; i<m; i++) { 4896 bnzi = 0; 4897 /* add local non-zero cols of this proc's seqmat into lnk */ 4898 arow = owners[rank] + i; 4899 anzi = ai[arow+1] - ai[arow]; 4900 aj = a->j + ai[arow]; 4901 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4902 bnzi += nlnk; 4903 /* add received col data into lnk */ 4904 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4905 if (i == *nextrow[k]) { /* i-th row */ 4906 anzi = *(nextai[k]+1) - *nextai[k]; 4907 aj = buf_rj[k] + *nextai[k]; 4908 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4909 bnzi += nlnk; 4910 nextrow[k]++; nextai[k]++; 4911 } 4912 } 4913 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4914 4915 /* if free space is not available, make more free space */ 4916 if (current_space->local_remaining<bnzi) { 4917 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4918 nspacedouble++; 4919 } 4920 /* copy data into free space, then initialize lnk */ 4921 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4922 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4923 4924 current_space->array += bnzi; 4925 current_space->local_used += bnzi; 4926 current_space->local_remaining -= bnzi; 4927 4928 bi[i+1] = bi[i] + bnzi; 4929 } 4930 4931 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4932 4933 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4934 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4935 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4936 4937 /* create symbolic parallel matrix B_mpi */ 4938 /*---------------------------------------*/ 4939 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4940 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4941 if (n==PETSC_DECIDE) { 4942 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4943 } else { 4944 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4945 } 4946 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4947 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4948 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4949 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4950 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4951 4952 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4953 B_mpi->assembled = PETSC_FALSE; 4954 merge->bi = bi; 4955 merge->bj = bj; 4956 merge->buf_ri = buf_ri; 4957 merge->buf_rj = buf_rj; 4958 merge->coi = NULL; 4959 merge->coj = NULL; 4960 merge->owners_co = NULL; 4961 4962 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4963 4964 /* attach the supporting struct to B_mpi for reuse */ 4965 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4966 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4967 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 4968 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4969 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4970 *mpimat = B_mpi; 4971 4972 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4973 PetscFunctionReturn(0); 4974 } 4975 4976 /*@C 4977 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4978 matrices from each processor 4979 4980 Collective 4981 4982 Input Parameters: 4983 + comm - the communicators the parallel matrix will live on 4984 . seqmat - the input sequential matrices 4985 . m - number of local rows (or PETSC_DECIDE) 4986 . n - number of local columns (or PETSC_DECIDE) 4987 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4988 4989 Output Parameter: 4990 . mpimat - the parallel matrix generated 4991 4992 Level: advanced 4993 4994 Notes: 4995 The dimensions of the sequential matrix in each processor MUST be the same. 4996 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4997 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4998 @*/ 4999 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5000 { 5001 PetscErrorCode ierr; 5002 PetscMPIInt size; 5003 5004 PetscFunctionBegin; 5005 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5006 if (size == 1) { 5007 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5008 if (scall == MAT_INITIAL_MATRIX) { 5009 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5010 } else { 5011 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5012 } 5013 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5014 PetscFunctionReturn(0); 5015 } 5016 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5017 if (scall == MAT_INITIAL_MATRIX) { 5018 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5019 } 5020 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5021 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5022 PetscFunctionReturn(0); 5023 } 5024 5025 /*@ 5026 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5027 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5028 with MatGetSize() 5029 5030 Not Collective 5031 5032 Input Parameters: 5033 + A - the matrix 5034 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5035 5036 Output Parameter: 5037 . A_loc - the local sequential matrix generated 5038 5039 Level: developer 5040 5041 Notes: 5042 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5043 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5044 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5045 modify the values of the returned A_loc. 5046 5047 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 5048 @*/ 5049 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5050 { 5051 PetscErrorCode ierr; 5052 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5053 Mat_SeqAIJ *mat,*a,*b; 5054 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5055 const PetscScalar *aa,*ba,*aav,*bav; 5056 PetscScalar *ca,*cam; 5057 PetscMPIInt size; 5058 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5059 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5060 PetscBool match; 5061 5062 PetscFunctionBegin; 5063 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5064 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5065 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5066 if (size == 1) { 5067 if (scall == MAT_INITIAL_MATRIX) { 5068 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5069 *A_loc = mpimat->A; 5070 } else if (scall == MAT_REUSE_MATRIX) { 5071 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5072 } 5073 PetscFunctionReturn(0); 5074 } 5075 5076 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5077 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5078 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5079 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5080 ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5081 ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5082 aa = aav; 5083 ba = bav; 5084 if (scall == MAT_INITIAL_MATRIX) { 5085 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5086 ci[0] = 0; 5087 for (i=0; i<am; i++) { 5088 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5089 } 5090 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5091 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5092 k = 0; 5093 for (i=0; i<am; i++) { 5094 ncols_o = bi[i+1] - bi[i]; 5095 ncols_d = ai[i+1] - ai[i]; 5096 /* off-diagonal portion of A */ 5097 for (jo=0; jo<ncols_o; jo++) { 5098 col = cmap[*bj]; 5099 if (col >= cstart) break; 5100 cj[k] = col; bj++; 5101 ca[k++] = *ba++; 5102 } 5103 /* diagonal portion of A */ 5104 for (j=0; j<ncols_d; j++) { 5105 cj[k] = cstart + *aj++; 5106 ca[k++] = *aa++; 5107 } 5108 /* off-diagonal portion of A */ 5109 for (j=jo; j<ncols_o; j++) { 5110 cj[k] = cmap[*bj++]; 5111 ca[k++] = *ba++; 5112 } 5113 } 5114 /* put together the new matrix */ 5115 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5116 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5117 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5118 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5119 mat->free_a = PETSC_TRUE; 5120 mat->free_ij = PETSC_TRUE; 5121 mat->nonew = 0; 5122 } else if (scall == MAT_REUSE_MATRIX) { 5123 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5124 #if defined(PETSC_USE_DEVICE) 5125 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5126 #endif 5127 ci = mat->i; cj = mat->j; cam = mat->a; 5128 for (i=0; i<am; i++) { 5129 /* off-diagonal portion of A */ 5130 ncols_o = bi[i+1] - bi[i]; 5131 for (jo=0; jo<ncols_o; jo++) { 5132 col = cmap[*bj]; 5133 if (col >= cstart) break; 5134 *cam++ = *ba++; bj++; 5135 } 5136 /* diagonal portion of A */ 5137 ncols_d = ai[i+1] - ai[i]; 5138 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5139 /* off-diagonal portion of A */ 5140 for (j=jo; j<ncols_o; j++) { 5141 *cam++ = *ba++; bj++; 5142 } 5143 } 5144 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5145 ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5146 ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5147 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5148 PetscFunctionReturn(0); 5149 } 5150 5151 /*@ 5152 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5153 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5154 5155 Not Collective 5156 5157 Input Parameters: 5158 + A - the matrix 5159 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5160 5161 Output Parameters: 5162 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5163 - A_loc - the local sequential matrix generated 5164 5165 Level: developer 5166 5167 Notes: 5168 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5169 5170 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5171 5172 @*/ 5173 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5174 { 5175 PetscErrorCode ierr; 5176 Mat Ao,Ad; 5177 const PetscInt *cmap; 5178 PetscMPIInt size; 5179 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5180 5181 PetscFunctionBegin; 5182 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr); 5183 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5184 if (size == 1) { 5185 if (scall == MAT_INITIAL_MATRIX) { 5186 ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr); 5187 *A_loc = Ad; 5188 } else if (scall == MAT_REUSE_MATRIX) { 5189 ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5190 } 5191 if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); } 5192 PetscFunctionReturn(0); 5193 } 5194 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr); 5195 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5196 if (f) { 5197 ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr); 5198 } else { 5199 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5200 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5201 Mat_SeqAIJ *c; 5202 PetscInt *ai = a->i, *aj = a->j; 5203 PetscInt *bi = b->i, *bj = b->j; 5204 PetscInt *ci,*cj; 5205 const PetscScalar *aa,*ba; 5206 PetscScalar *ca; 5207 PetscInt i,j,am,dn,on; 5208 5209 ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr); 5210 ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr); 5211 ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr); 5212 ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr); 5213 if (scall == MAT_INITIAL_MATRIX) { 5214 PetscInt k; 5215 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5216 ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr); 5217 ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr); 5218 ci[0] = 0; 5219 for (i=0,k=0; i<am; i++) { 5220 const PetscInt ncols_o = bi[i+1] - bi[i]; 5221 const PetscInt ncols_d = ai[i+1] - ai[i]; 5222 ci[i+1] = ci[i] + ncols_o + ncols_d; 5223 /* diagonal portion of A */ 5224 for (j=0; j<ncols_d; j++,k++) { 5225 cj[k] = *aj++; 5226 ca[k] = *aa++; 5227 } 5228 /* off-diagonal portion of A */ 5229 for (j=0; j<ncols_o; j++,k++) { 5230 cj[k] = dn + *bj++; 5231 ca[k] = *ba++; 5232 } 5233 } 5234 /* put together the new matrix */ 5235 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr); 5236 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5237 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5238 c = (Mat_SeqAIJ*)(*A_loc)->data; 5239 c->free_a = PETSC_TRUE; 5240 c->free_ij = PETSC_TRUE; 5241 c->nonew = 0; 5242 ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr); 5243 } else if (scall == MAT_REUSE_MATRIX) { 5244 #if defined(PETSC_HAVE_DEVICE) 5245 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5246 #endif 5247 c = (Mat_SeqAIJ*)(*A_loc)->data; 5248 ca = c->a; 5249 for (i=0; i<am; i++) { 5250 const PetscInt ncols_d = ai[i+1] - ai[i]; 5251 const PetscInt ncols_o = bi[i+1] - bi[i]; 5252 /* diagonal portion of A */ 5253 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5254 /* off-diagonal portion of A */ 5255 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5256 } 5257 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5258 ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr); 5259 ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr); 5260 if (glob) { 5261 PetscInt cst, *gidx; 5262 5263 ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr); 5264 ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr); 5265 for (i=0; i<dn; i++) gidx[i] = cst + i; 5266 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5267 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr); 5268 } 5269 } 5270 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5271 PetscFunctionReturn(0); 5272 } 5273 5274 /*@C 5275 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5276 5277 Not Collective 5278 5279 Input Parameters: 5280 + A - the matrix 5281 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5282 - row, col - index sets of rows and columns to extract (or NULL) 5283 5284 Output Parameter: 5285 . A_loc - the local sequential matrix generated 5286 5287 Level: developer 5288 5289 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5290 5291 @*/ 5292 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5293 { 5294 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5295 PetscErrorCode ierr; 5296 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5297 IS isrowa,iscola; 5298 Mat *aloc; 5299 PetscBool match; 5300 5301 PetscFunctionBegin; 5302 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5303 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5304 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5305 if (!row) { 5306 start = A->rmap->rstart; end = A->rmap->rend; 5307 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5308 } else { 5309 isrowa = *row; 5310 } 5311 if (!col) { 5312 start = A->cmap->rstart; 5313 cmap = a->garray; 5314 nzA = a->A->cmap->n; 5315 nzB = a->B->cmap->n; 5316 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5317 ncols = 0; 5318 for (i=0; i<nzB; i++) { 5319 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5320 else break; 5321 } 5322 imark = i; 5323 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5324 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5325 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5326 } else { 5327 iscola = *col; 5328 } 5329 if (scall != MAT_INITIAL_MATRIX) { 5330 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5331 aloc[0] = *A_loc; 5332 } 5333 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5334 if (!col) { /* attach global id of condensed columns */ 5335 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5336 } 5337 *A_loc = aloc[0]; 5338 ierr = PetscFree(aloc);CHKERRQ(ierr); 5339 if (!row) { 5340 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5341 } 5342 if (!col) { 5343 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5344 } 5345 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5346 PetscFunctionReturn(0); 5347 } 5348 5349 /* 5350 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5351 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5352 * on a global size. 5353 * */ 5354 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5355 { 5356 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5357 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5358 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5359 PetscMPIInt owner; 5360 PetscSFNode *iremote,*oiremote; 5361 const PetscInt *lrowindices; 5362 PetscErrorCode ierr; 5363 PetscSF sf,osf; 5364 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5365 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5366 MPI_Comm comm; 5367 ISLocalToGlobalMapping mapping; 5368 5369 PetscFunctionBegin; 5370 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5371 /* plocalsize is the number of roots 5372 * nrows is the number of leaves 5373 * */ 5374 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5375 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5376 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5377 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5378 for (i=0;i<nrows;i++) { 5379 /* Find a remote index and an owner for a row 5380 * The row could be local or remote 5381 * */ 5382 owner = 0; 5383 lidx = 0; 5384 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5385 iremote[i].index = lidx; 5386 iremote[i].rank = owner; 5387 } 5388 /* Create SF to communicate how many nonzero columns for each row */ 5389 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5390 /* SF will figure out the number of nonzero colunms for each row, and their 5391 * offsets 5392 * */ 5393 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5394 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5395 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5396 5397 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5398 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5399 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5400 roffsets[0] = 0; 5401 roffsets[1] = 0; 5402 for (i=0;i<plocalsize;i++) { 5403 /* diag */ 5404 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5405 /* off diag */ 5406 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5407 /* compute offsets so that we relative location for each row */ 5408 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5409 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5410 } 5411 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5412 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5413 /* 'r' means root, and 'l' means leaf */ 5414 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5415 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5416 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5417 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5418 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5419 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5420 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5421 dntotalcols = 0; 5422 ontotalcols = 0; 5423 ncol = 0; 5424 for (i=0;i<nrows;i++) { 5425 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5426 ncol = PetscMax(pnnz[i],ncol); 5427 /* diag */ 5428 dntotalcols += nlcols[i*2+0]; 5429 /* off diag */ 5430 ontotalcols += nlcols[i*2+1]; 5431 } 5432 /* We do not need to figure the right number of columns 5433 * since all the calculations will be done by going through the raw data 5434 * */ 5435 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5436 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5437 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5438 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5439 /* diag */ 5440 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5441 /* off diag */ 5442 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5443 /* diag */ 5444 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5445 /* off diag */ 5446 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5447 dntotalcols = 0; 5448 ontotalcols = 0; 5449 ntotalcols = 0; 5450 for (i=0;i<nrows;i++) { 5451 owner = 0; 5452 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5453 /* Set iremote for diag matrix */ 5454 for (j=0;j<nlcols[i*2+0];j++) { 5455 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5456 iremote[dntotalcols].rank = owner; 5457 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5458 ilocal[dntotalcols++] = ntotalcols++; 5459 } 5460 /* off diag */ 5461 for (j=0;j<nlcols[i*2+1];j++) { 5462 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5463 oiremote[ontotalcols].rank = owner; 5464 oilocal[ontotalcols++] = ntotalcols++; 5465 } 5466 } 5467 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5468 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5469 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5470 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5471 /* P serves as roots and P_oth is leaves 5472 * Diag matrix 5473 * */ 5474 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5475 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5476 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5477 5478 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5479 /* Off diag */ 5480 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5481 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5482 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5483 /* We operate on the matrix internal data for saving memory */ 5484 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5485 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5486 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5487 /* Convert to global indices for diag matrix */ 5488 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5489 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5490 /* We want P_oth store global indices */ 5491 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5492 /* Use memory scalable approach */ 5493 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5494 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5495 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5496 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5497 /* Convert back to local indices */ 5498 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5499 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5500 nout = 0; 5501 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5502 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5503 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5504 /* Exchange values */ 5505 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5506 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5507 /* Stop PETSc from shrinking memory */ 5508 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5509 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5510 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5511 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5512 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5513 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5514 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5515 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5516 PetscFunctionReturn(0); 5517 } 5518 5519 /* 5520 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5521 * This supports MPIAIJ and MAIJ 5522 * */ 5523 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5524 { 5525 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5526 Mat_SeqAIJ *p_oth; 5527 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5528 IS rows,map; 5529 PetscHMapI hamp; 5530 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5531 MPI_Comm comm; 5532 PetscSF sf,osf; 5533 PetscBool has; 5534 PetscErrorCode ierr; 5535 5536 PetscFunctionBegin; 5537 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5538 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5539 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5540 * and then create a submatrix (that often is an overlapping matrix) 5541 * */ 5542 if (reuse == MAT_INITIAL_MATRIX) { 5543 /* Use a hash table to figure out unique keys */ 5544 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5545 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5546 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5547 count = 0; 5548 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5549 for (i=0;i<a->B->cmap->n;i++) { 5550 key = a->garray[i]/dof; 5551 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5552 if (!has) { 5553 mapping[i] = count; 5554 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5555 } else { 5556 /* Current 'i' has the same value the previous step */ 5557 mapping[i] = count-1; 5558 } 5559 } 5560 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5561 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5562 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count); 5563 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5564 off = 0; 5565 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5566 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5567 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5568 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5569 /* In case, the matrix was already created but users want to recreate the matrix */ 5570 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5571 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5572 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5573 ierr = ISDestroy(&map);CHKERRQ(ierr); 5574 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5575 } else if (reuse == MAT_REUSE_MATRIX) { 5576 /* If matrix was already created, we simply update values using SF objects 5577 * that as attached to the matrix ealier. 5578 * */ 5579 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5580 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5581 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5582 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5583 /* Update values in place */ 5584 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5585 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5586 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5587 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5588 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5589 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5590 PetscFunctionReturn(0); 5591 } 5592 5593 /*@C 5594 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5595 5596 Collective on Mat 5597 5598 Input Parameters: 5599 + A - the first matrix in mpiaij format 5600 . B - the second matrix in mpiaij format 5601 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5602 5603 Input/Output Parameters: 5604 + rowb - index sets of rows of B to extract (or NULL), modified on output 5605 - colb - index sets of columns of B to extract (or NULL), modified on output 5606 5607 Output Parameter: 5608 . B_seq - the sequential matrix generated 5609 5610 Level: developer 5611 5612 @*/ 5613 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5614 { 5615 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5616 PetscErrorCode ierr; 5617 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5618 IS isrowb,iscolb; 5619 Mat *bseq=NULL; 5620 5621 PetscFunctionBegin; 5622 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5623 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5624 } 5625 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5626 5627 if (scall == MAT_INITIAL_MATRIX) { 5628 start = A->cmap->rstart; 5629 cmap = a->garray; 5630 nzA = a->A->cmap->n; 5631 nzB = a->B->cmap->n; 5632 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5633 ncols = 0; 5634 for (i=0; i<nzB; i++) { /* row < local row index */ 5635 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5636 else break; 5637 } 5638 imark = i; 5639 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5640 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5641 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5642 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5643 } else { 5644 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5645 isrowb = *rowb; iscolb = *colb; 5646 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5647 bseq[0] = *B_seq; 5648 } 5649 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5650 *B_seq = bseq[0]; 5651 ierr = PetscFree(bseq);CHKERRQ(ierr); 5652 if (!rowb) { 5653 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5654 } else { 5655 *rowb = isrowb; 5656 } 5657 if (!colb) { 5658 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5659 } else { 5660 *colb = iscolb; 5661 } 5662 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5663 PetscFunctionReturn(0); 5664 } 5665 5666 /* 5667 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5668 of the OFF-DIAGONAL portion of local A 5669 5670 Collective on Mat 5671 5672 Input Parameters: 5673 + A,B - the matrices in mpiaij format 5674 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5675 5676 Output Parameter: 5677 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5678 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5679 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5680 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5681 5682 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5683 for this matrix. This is not desirable.. 5684 5685 Level: developer 5686 5687 */ 5688 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5689 { 5690 PetscErrorCode ierr; 5691 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5692 Mat_SeqAIJ *b_oth; 5693 VecScatter ctx; 5694 MPI_Comm comm; 5695 const PetscMPIInt *rprocs,*sprocs; 5696 const PetscInt *srow,*rstarts,*sstarts; 5697 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5698 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5699 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5700 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5701 PetscMPIInt size,tag,rank,nreqs; 5702 5703 PetscFunctionBegin; 5704 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5705 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5706 5707 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5708 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5709 } 5710 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5711 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 5712 5713 if (size == 1) { 5714 startsj_s = NULL; 5715 bufa_ptr = NULL; 5716 *B_oth = NULL; 5717 PetscFunctionReturn(0); 5718 } 5719 5720 ctx = a->Mvctx; 5721 tag = ((PetscObject)ctx)->tag; 5722 5723 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5724 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5725 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5726 ierr = PetscMPIIntCast(nsends+nrecvs,&nreqs);CHKERRQ(ierr); 5727 ierr = PetscMalloc1(nreqs,&reqs);CHKERRQ(ierr); 5728 rwaits = reqs; 5729 swaits = reqs + nrecvs; 5730 5731 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5732 if (scall == MAT_INITIAL_MATRIX) { 5733 /* i-array */ 5734 /*---------*/ 5735 /* post receives */ 5736 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5737 for (i=0; i<nrecvs; i++) { 5738 rowlen = rvalues + rstarts[i]*rbs; 5739 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5740 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5741 } 5742 5743 /* pack the outgoing message */ 5744 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5745 5746 sstartsj[0] = 0; 5747 rstartsj[0] = 0; 5748 len = 0; /* total length of j or a array to be sent */ 5749 if (nsends) { 5750 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5751 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5752 } 5753 for (i=0; i<nsends; i++) { 5754 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5755 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5756 for (j=0; j<nrows; j++) { 5757 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5758 for (l=0; l<sbs; l++) { 5759 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5760 5761 rowlen[j*sbs+l] = ncols; 5762 5763 len += ncols; 5764 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5765 } 5766 k++; 5767 } 5768 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5769 5770 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5771 } 5772 /* recvs and sends of i-array are completed */ 5773 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5774 ierr = PetscFree(svalues);CHKERRQ(ierr); 5775 5776 /* allocate buffers for sending j and a arrays */ 5777 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5778 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5779 5780 /* create i-array of B_oth */ 5781 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5782 5783 b_othi[0] = 0; 5784 len = 0; /* total length of j or a array to be received */ 5785 k = 0; 5786 for (i=0; i<nrecvs; i++) { 5787 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5788 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5789 for (j=0; j<nrows; j++) { 5790 b_othi[k+1] = b_othi[k] + rowlen[j]; 5791 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5792 k++; 5793 } 5794 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5795 } 5796 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5797 5798 /* allocate space for j and a arrrays of B_oth */ 5799 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5800 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5801 5802 /* j-array */ 5803 /*---------*/ 5804 /* post receives of j-array */ 5805 for (i=0; i<nrecvs; i++) { 5806 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5807 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5808 } 5809 5810 /* pack the outgoing message j-array */ 5811 if (nsends) k = sstarts[0]; 5812 for (i=0; i<nsends; i++) { 5813 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5814 bufJ = bufj+sstartsj[i]; 5815 for (j=0; j<nrows; j++) { 5816 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5817 for (ll=0; ll<sbs; ll++) { 5818 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5819 for (l=0; l<ncols; l++) { 5820 *bufJ++ = cols[l]; 5821 } 5822 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5823 } 5824 } 5825 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5826 } 5827 5828 /* recvs and sends of j-array are completed */ 5829 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5830 } else if (scall == MAT_REUSE_MATRIX) { 5831 sstartsj = *startsj_s; 5832 rstartsj = *startsj_r; 5833 bufa = *bufa_ptr; 5834 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5835 b_otha = b_oth->a; 5836 #if defined(PETSC_HAVE_DEVICE) 5837 (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU; 5838 #endif 5839 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5840 5841 /* a-array */ 5842 /*---------*/ 5843 /* post receives of a-array */ 5844 for (i=0; i<nrecvs; i++) { 5845 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5846 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5847 } 5848 5849 /* pack the outgoing message a-array */ 5850 if (nsends) k = sstarts[0]; 5851 for (i=0; i<nsends; i++) { 5852 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5853 bufA = bufa+sstartsj[i]; 5854 for (j=0; j<nrows; j++) { 5855 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5856 for (ll=0; ll<sbs; ll++) { 5857 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5858 for (l=0; l<ncols; l++) { 5859 *bufA++ = vals[l]; 5860 } 5861 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5862 } 5863 } 5864 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5865 } 5866 /* recvs and sends of a-array are completed */ 5867 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5868 ierr = PetscFree(reqs);CHKERRQ(ierr); 5869 5870 if (scall == MAT_INITIAL_MATRIX) { 5871 /* put together the new matrix */ 5872 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5873 5874 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5875 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5876 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5877 b_oth->free_a = PETSC_TRUE; 5878 b_oth->free_ij = PETSC_TRUE; 5879 b_oth->nonew = 0; 5880 5881 ierr = PetscFree(bufj);CHKERRQ(ierr); 5882 if (!startsj_s || !bufa_ptr) { 5883 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5884 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5885 } else { 5886 *startsj_s = sstartsj; 5887 *startsj_r = rstartsj; 5888 *bufa_ptr = bufa; 5889 } 5890 } 5891 5892 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5893 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5894 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5895 PetscFunctionReturn(0); 5896 } 5897 5898 /*@C 5899 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5900 5901 Not Collective 5902 5903 Input Parameter: 5904 . A - The matrix in mpiaij format 5905 5906 Output Parameters: 5907 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5908 . colmap - A map from global column index to local index into lvec 5909 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5910 5911 Level: developer 5912 5913 @*/ 5914 #if defined(PETSC_USE_CTABLE) 5915 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5916 #else 5917 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5918 #endif 5919 { 5920 Mat_MPIAIJ *a; 5921 5922 PetscFunctionBegin; 5923 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5924 PetscValidPointer(lvec, 2); 5925 PetscValidPointer(colmap, 3); 5926 PetscValidPointer(multScatter, 4); 5927 a = (Mat_MPIAIJ*) A->data; 5928 if (lvec) *lvec = a->lvec; 5929 if (colmap) *colmap = a->colmap; 5930 if (multScatter) *multScatter = a->Mvctx; 5931 PetscFunctionReturn(0); 5932 } 5933 5934 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5935 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5936 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5937 #if defined(PETSC_HAVE_MKL_SPARSE) 5938 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5939 #endif 5940 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5941 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5942 #if defined(PETSC_HAVE_ELEMENTAL) 5943 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5944 #endif 5945 #if defined(PETSC_HAVE_SCALAPACK) 5946 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5947 #endif 5948 #if defined(PETSC_HAVE_HYPRE) 5949 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5950 #endif 5951 #if defined(PETSC_HAVE_CUDA) 5952 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5953 #endif 5954 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5955 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5956 #endif 5957 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5958 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5959 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5960 5961 /* 5962 Computes (B'*A')' since computing B*A directly is untenable 5963 5964 n p p 5965 [ ] [ ] [ ] 5966 m [ A ] * n [ B ] = m [ C ] 5967 [ ] [ ] [ ] 5968 5969 */ 5970 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5971 { 5972 PetscErrorCode ierr; 5973 Mat At,Bt,Ct; 5974 5975 PetscFunctionBegin; 5976 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5977 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5978 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5979 ierr = MatDestroy(&At);CHKERRQ(ierr); 5980 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5981 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5982 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5983 PetscFunctionReturn(0); 5984 } 5985 5986 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5987 { 5988 PetscErrorCode ierr; 5989 PetscBool cisdense; 5990 5991 PetscFunctionBegin; 5992 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5993 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 5994 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5995 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 5996 if (!cisdense) { 5997 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 5998 } 5999 ierr = MatSetUp(C);CHKERRQ(ierr); 6000 6001 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6002 PetscFunctionReturn(0); 6003 } 6004 6005 /* ----------------------------------------------------------------*/ 6006 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6007 { 6008 Mat_Product *product = C->product; 6009 Mat A = product->A,B=product->B; 6010 6011 PetscFunctionBegin; 6012 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6013 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6014 6015 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6016 C->ops->productsymbolic = MatProductSymbolic_AB; 6017 PetscFunctionReturn(0); 6018 } 6019 6020 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6021 { 6022 PetscErrorCode ierr; 6023 Mat_Product *product = C->product; 6024 6025 PetscFunctionBegin; 6026 if (product->type == MATPRODUCT_AB) { 6027 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 6028 } 6029 PetscFunctionReturn(0); 6030 } 6031 /* ----------------------------------------------------------------*/ 6032 6033 /*MC 6034 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6035 6036 Options Database Keys: 6037 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6038 6039 Level: beginner 6040 6041 Notes: 6042 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6043 in this case the values associated with the rows and columns one passes in are set to zero 6044 in the matrix 6045 6046 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6047 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6048 6049 .seealso: MatCreateAIJ() 6050 M*/ 6051 6052 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6053 { 6054 Mat_MPIAIJ *b; 6055 PetscErrorCode ierr; 6056 PetscMPIInt size; 6057 6058 PetscFunctionBegin; 6059 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 6060 6061 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6062 B->data = (void*)b; 6063 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6064 B->assembled = PETSC_FALSE; 6065 B->insertmode = NOT_SET_VALUES; 6066 b->size = size; 6067 6068 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr); 6069 6070 /* build cache for off array entries formed */ 6071 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6072 6073 b->donotstash = PETSC_FALSE; 6074 b->colmap = NULL; 6075 b->garray = NULL; 6076 b->roworiented = PETSC_TRUE; 6077 6078 /* stuff used for matrix vector multiply */ 6079 b->lvec = NULL; 6080 b->Mvctx = NULL; 6081 6082 /* stuff for MatGetRow() */ 6083 b->rowindices = NULL; 6084 b->rowvalues = NULL; 6085 b->getrowactive = PETSC_FALSE; 6086 6087 /* flexible pointer used in CUSPARSE classes */ 6088 b->spptr = NULL; 6089 6090 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6092 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6093 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6094 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6095 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6096 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6098 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6099 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6100 #if defined(PETSC_HAVE_CUDA) 6101 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr); 6102 #endif 6103 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6104 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr); 6105 #endif 6106 #if defined(PETSC_HAVE_MKL_SPARSE) 6107 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6108 #endif 6109 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6110 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6111 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6112 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr); 6113 #if defined(PETSC_HAVE_ELEMENTAL) 6114 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6115 #endif 6116 #if defined(PETSC_HAVE_SCALAPACK) 6117 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6118 #endif 6119 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6120 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6121 #if defined(PETSC_HAVE_HYPRE) 6122 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6123 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6124 #endif 6125 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6126 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6127 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6128 PetscFunctionReturn(0); 6129 } 6130 6131 /*@C 6132 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6133 and "off-diagonal" part of the matrix in CSR format. 6134 6135 Collective 6136 6137 Input Parameters: 6138 + comm - MPI communicator 6139 . m - number of local rows (Cannot be PETSC_DECIDE) 6140 . n - This value should be the same as the local size used in creating the 6141 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6142 calculated if N is given) For square matrices n is almost always m. 6143 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6144 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6145 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6146 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6147 . a - matrix values 6148 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6149 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6150 - oa - matrix values 6151 6152 Output Parameter: 6153 . mat - the matrix 6154 6155 Level: advanced 6156 6157 Notes: 6158 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6159 must free the arrays once the matrix has been destroyed and not before. 6160 6161 The i and j indices are 0 based 6162 6163 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6164 6165 This sets local rows and cannot be used to set off-processor values. 6166 6167 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6168 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6169 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6170 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6171 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6172 communication if it is known that only local entries will be set. 6173 6174 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6175 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6176 @*/ 6177 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6178 { 6179 PetscErrorCode ierr; 6180 Mat_MPIAIJ *maij; 6181 6182 PetscFunctionBegin; 6183 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6184 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6185 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6186 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6187 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6188 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6189 maij = (Mat_MPIAIJ*) (*mat)->data; 6190 6191 (*mat)->preallocated = PETSC_TRUE; 6192 6193 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6194 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6195 6196 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6197 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6198 6199 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6200 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6201 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6202 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6203 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6204 PetscFunctionReturn(0); 6205 } 6206 6207 /* 6208 Special version for direct calls from Fortran 6209 */ 6210 #include <petsc/private/fortranimpl.h> 6211 6212 /* Change these macros so can be used in void function */ 6213 #undef CHKERRQ 6214 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6215 #undef SETERRQ2 6216 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6217 #undef SETERRQ3 6218 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6219 #undef SETERRQ 6220 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6221 6222 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6223 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6224 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6225 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6226 #else 6227 #endif 6228 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6229 { 6230 Mat mat = *mmat; 6231 PetscInt m = *mm, n = *mn; 6232 InsertMode addv = *maddv; 6233 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6234 PetscScalar value; 6235 PetscErrorCode ierr; 6236 6237 MatCheckPreallocated(mat,1); 6238 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6239 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6240 { 6241 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6242 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6243 PetscBool roworiented = aij->roworiented; 6244 6245 /* Some Variables required in the macro */ 6246 Mat A = aij->A; 6247 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6248 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6249 MatScalar *aa = a->a; 6250 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6251 Mat B = aij->B; 6252 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6253 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6254 MatScalar *ba = b->a; 6255 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6256 * cannot use "#if defined" inside a macro. */ 6257 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6258 6259 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6260 PetscInt nonew = a->nonew; 6261 MatScalar *ap1,*ap2; 6262 6263 PetscFunctionBegin; 6264 for (i=0; i<m; i++) { 6265 if (im[i] < 0) continue; 6266 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6267 if (im[i] >= rstart && im[i] < rend) { 6268 row = im[i] - rstart; 6269 lastcol1 = -1; 6270 rp1 = aj + ai[row]; 6271 ap1 = aa + ai[row]; 6272 rmax1 = aimax[row]; 6273 nrow1 = ailen[row]; 6274 low1 = 0; 6275 high1 = nrow1; 6276 lastcol2 = -1; 6277 rp2 = bj + bi[row]; 6278 ap2 = ba + bi[row]; 6279 rmax2 = bimax[row]; 6280 nrow2 = bilen[row]; 6281 low2 = 0; 6282 high2 = nrow2; 6283 6284 for (j=0; j<n; j++) { 6285 if (roworiented) value = v[i*n+j]; 6286 else value = v[i+j*m]; 6287 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6288 if (in[j] >= cstart && in[j] < cend) { 6289 col = in[j] - cstart; 6290 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6291 #if defined(PETSC_HAVE_DEVICE) 6292 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6293 #endif 6294 } else if (in[j] < 0) continue; 6295 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6296 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6297 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6298 } else { 6299 if (mat->was_assembled) { 6300 if (!aij->colmap) { 6301 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6302 } 6303 #if defined(PETSC_USE_CTABLE) 6304 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6305 col--; 6306 #else 6307 col = aij->colmap[in[j]] - 1; 6308 #endif 6309 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6310 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6311 col = in[j]; 6312 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6313 B = aij->B; 6314 b = (Mat_SeqAIJ*)B->data; 6315 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6316 rp2 = bj + bi[row]; 6317 ap2 = ba + bi[row]; 6318 rmax2 = bimax[row]; 6319 nrow2 = bilen[row]; 6320 low2 = 0; 6321 high2 = nrow2; 6322 bm = aij->B->rmap->n; 6323 ba = b->a; 6324 inserted = PETSC_FALSE; 6325 } 6326 } else col = in[j]; 6327 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6328 #if defined(PETSC_HAVE_DEVICE) 6329 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6330 #endif 6331 } 6332 } 6333 } else if (!aij->donotstash) { 6334 if (roworiented) { 6335 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6336 } else { 6337 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6338 } 6339 } 6340 } 6341 } 6342 PetscFunctionReturnVoid(); 6343 } 6344 6345 typedef struct { 6346 Mat *mp; /* intermediate products */ 6347 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6348 PetscInt cp; /* number of intermediate products */ 6349 6350 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6351 PetscInt *startsj_s,*startsj_r; 6352 PetscScalar *bufa; 6353 Mat P_oth; 6354 6355 /* may take advantage of merging product->B */ 6356 Mat Bloc; /* B-local by merging diag and off-diag */ 6357 6358 /* cusparse does not have support to split between symbolic and numeric phases. 6359 When api_user is true, we don't need to update the numerical values 6360 of the temporary storage */ 6361 PetscBool reusesym; 6362 6363 /* support for COO values insertion */ 6364 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6365 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6366 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6367 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6368 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6369 PetscMemType mtype; 6370 6371 /* customization */ 6372 PetscBool abmerge; 6373 PetscBool P_oth_bind; 6374 } MatMatMPIAIJBACKEND; 6375 6376 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6377 { 6378 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6379 PetscInt i; 6380 PetscErrorCode ierr; 6381 6382 PetscFunctionBegin; 6383 ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr); 6384 ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr); 6385 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr); 6386 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr); 6387 ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr); 6388 ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr); 6389 ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr); 6390 for (i = 0; i < mmdata->cp; i++) { 6391 ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr); 6392 } 6393 ierr = PetscFree2(mmdata->mp,mmdata->mptmp);CHKERRQ(ierr); 6394 ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr); 6395 ierr = PetscFree(mmdata->own);CHKERRQ(ierr); 6396 ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr); 6397 ierr = PetscFree(mmdata->off);CHKERRQ(ierr); 6398 ierr = PetscFree(mmdata);CHKERRQ(ierr); 6399 PetscFunctionReturn(0); 6400 } 6401 6402 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6403 { 6404 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6405 PetscErrorCode ierr; 6406 6407 PetscFunctionBegin; 6408 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr); 6409 if (f) { 6410 ierr = (*f)(A,n,idx,v);CHKERRQ(ierr); 6411 } else { 6412 const PetscScalar *vv; 6413 6414 ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr); 6415 if (n && idx) { 6416 PetscScalar *w = v; 6417 const PetscInt *oi = idx; 6418 PetscInt j; 6419 6420 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6421 } else { 6422 ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr); 6423 } 6424 ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr); 6425 } 6426 PetscFunctionReturn(0); 6427 } 6428 6429 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6430 { 6431 MatMatMPIAIJBACKEND *mmdata; 6432 PetscInt i,n_d,n_o; 6433 PetscErrorCode ierr; 6434 6435 PetscFunctionBegin; 6436 MatCheckProduct(C,1); 6437 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6438 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6439 if (!mmdata->reusesym) { /* update temporary matrices */ 6440 if (mmdata->P_oth) { 6441 ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6442 } 6443 if (mmdata->Bloc) { 6444 ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr); 6445 } 6446 } 6447 mmdata->reusesym = PETSC_FALSE; 6448 6449 for (i = 0; i < mmdata->cp; i++) { 6450 if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6451 ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr); 6452 } 6453 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6454 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6455 6456 if (mmdata->mptmp[i]) continue; 6457 if (noff) { 6458 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6459 6460 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr); 6461 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr); 6462 n_o += noff; 6463 n_d += nown; 6464 } else { 6465 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6466 6467 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr); 6468 n_d += mm->nz; 6469 } 6470 } 6471 if (mmdata->hasoffproc) { /* offprocess insertion */ 6472 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6473 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6474 } 6475 ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr); 6476 PetscFunctionReturn(0); 6477 } 6478 6479 /* Support for Pt * A, A * P, or Pt * A * P */ 6480 #define MAX_NUMBER_INTERMEDIATE 4 6481 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6482 { 6483 Mat_Product *product = C->product; 6484 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6485 Mat_MPIAIJ *a,*p; 6486 MatMatMPIAIJBACKEND *mmdata; 6487 ISLocalToGlobalMapping P_oth_l2g = NULL; 6488 IS glob = NULL; 6489 const char *prefix; 6490 char pprefix[256]; 6491 const PetscInt *globidx,*P_oth_idx; 6492 PetscInt i,j,cp,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j; 6493 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6494 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6495 /* a base offset; type-2: sparse with a local to global map table */ 6496 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6497 6498 MatProductType ptype; 6499 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6500 PetscMPIInt size; 6501 PetscErrorCode ierr; 6502 6503 PetscFunctionBegin; 6504 MatCheckProduct(C,1); 6505 if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6506 ptype = product->type; 6507 if (product->A->symmetric && ptype == MATPRODUCT_AtB) { 6508 ptype = MATPRODUCT_AB; 6509 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 6510 } 6511 switch (ptype) { 6512 case MATPRODUCT_AB: 6513 A = product->A; 6514 P = product->B; 6515 m = A->rmap->n; 6516 n = P->cmap->n; 6517 M = A->rmap->N; 6518 N = P->cmap->N; 6519 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6520 break; 6521 case MATPRODUCT_AtB: 6522 P = product->A; 6523 A = product->B; 6524 m = P->cmap->n; 6525 n = A->cmap->n; 6526 M = P->cmap->N; 6527 N = A->cmap->N; 6528 hasoffproc = PETSC_TRUE; 6529 break; 6530 case MATPRODUCT_PtAP: 6531 A = product->A; 6532 P = product->B; 6533 m = P->cmap->n; 6534 n = P->cmap->n; 6535 M = P->cmap->N; 6536 N = P->cmap->N; 6537 hasoffproc = PETSC_TRUE; 6538 break; 6539 default: 6540 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6541 } 6542 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr); 6543 if (size == 1) hasoffproc = PETSC_FALSE; 6544 6545 /* defaults */ 6546 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6547 mp[i] = NULL; 6548 mptmp[i] = PETSC_FALSE; 6549 rmapt[i] = -1; 6550 cmapt[i] = -1; 6551 rmapa[i] = NULL; 6552 cmapa[i] = NULL; 6553 } 6554 6555 /* customization */ 6556 ierr = PetscNew(&mmdata);CHKERRQ(ierr); 6557 mmdata->reusesym = product->api_user; 6558 if (ptype == MATPRODUCT_AB) { 6559 if (product->api_user) { 6560 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 6561 ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6562 ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6563 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6564 } else { 6565 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 6566 ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6567 ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6568 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6569 } 6570 } else if (ptype == MATPRODUCT_PtAP) { 6571 if (product->api_user) { 6572 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 6573 ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6574 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6575 } else { 6576 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 6577 ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6578 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6579 } 6580 } 6581 a = (Mat_MPIAIJ*)A->data; 6582 p = (Mat_MPIAIJ*)P->data; 6583 ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr); 6584 ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr); 6585 ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr); 6586 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6587 ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr); 6588 6589 cp = 0; 6590 switch (ptype) { 6591 case MATPRODUCT_AB: /* A * P */ 6592 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6593 6594 /* A_diag * P_local (merged or not) */ 6595 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 6596 /* P is product->B */ 6597 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6598 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6599 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6600 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6601 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6602 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6603 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6604 mp[cp]->product->api_user = product->api_user; 6605 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6606 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6607 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6608 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6609 rmapt[cp] = 1; 6610 cmapt[cp] = 2; 6611 cmapa[cp] = globidx; 6612 mptmp[cp] = PETSC_FALSE; 6613 cp++; 6614 } else { /* A_diag * P_diag and A_diag * P_off */ 6615 ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr); 6616 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6617 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6618 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6619 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6620 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6621 mp[cp]->product->api_user = product->api_user; 6622 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6623 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6624 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6625 rmapt[cp] = 1; 6626 cmapt[cp] = 1; 6627 mptmp[cp] = PETSC_FALSE; 6628 cp++; 6629 ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr); 6630 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6631 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6632 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6633 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6634 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6635 mp[cp]->product->api_user = product->api_user; 6636 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6637 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6638 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6639 rmapt[cp] = 1; 6640 cmapt[cp] = 2; 6641 cmapa[cp] = p->garray; 6642 mptmp[cp] = PETSC_FALSE; 6643 cp++; 6644 } 6645 6646 /* A_off * P_other */ 6647 if (mmdata->P_oth) { 6648 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); /* make P_oth use local col ids */ 6649 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6650 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6651 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6652 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6653 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6654 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6655 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6656 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6657 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6658 mp[cp]->product->api_user = product->api_user; 6659 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6660 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6661 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6662 rmapt[cp] = 1; 6663 cmapt[cp] = 2; 6664 cmapa[cp] = P_oth_idx; 6665 mptmp[cp] = PETSC_FALSE; 6666 cp++; 6667 } 6668 break; 6669 6670 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 6671 /* A is product->B */ 6672 ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6673 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 6674 ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6675 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6676 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6677 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6678 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6679 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6680 mp[cp]->product->api_user = product->api_user; 6681 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6682 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6683 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6684 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6685 rmapt[cp] = 2; 6686 rmapa[cp] = globidx; 6687 cmapt[cp] = 2; 6688 cmapa[cp] = globidx; 6689 mptmp[cp] = PETSC_FALSE; 6690 cp++; 6691 } else { 6692 ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6693 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6694 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6695 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6696 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6697 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6698 mp[cp]->product->api_user = product->api_user; 6699 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6700 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6701 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6702 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6703 rmapt[cp] = 1; 6704 cmapt[cp] = 2; 6705 cmapa[cp] = globidx; 6706 mptmp[cp] = PETSC_FALSE; 6707 cp++; 6708 ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6709 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6710 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6711 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6712 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6713 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6714 mp[cp]->product->api_user = product->api_user; 6715 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6716 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6717 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6718 rmapt[cp] = 2; 6719 rmapa[cp] = p->garray; 6720 cmapt[cp] = 2; 6721 cmapa[cp] = globidx; 6722 mptmp[cp] = PETSC_FALSE; 6723 cp++; 6724 } 6725 break; 6726 case MATPRODUCT_PtAP: 6727 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6728 /* P is product->B */ 6729 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6730 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6731 ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr); 6732 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6733 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6734 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6735 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6736 mp[cp]->product->api_user = product->api_user; 6737 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6738 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6739 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6740 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6741 rmapt[cp] = 2; 6742 rmapa[cp] = globidx; 6743 cmapt[cp] = 2; 6744 cmapa[cp] = globidx; 6745 mptmp[cp] = PETSC_FALSE; 6746 cp++; 6747 if (mmdata->P_oth) { 6748 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); 6749 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6750 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6751 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6752 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6753 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6754 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6755 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6756 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6757 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6758 mp[cp]->product->api_user = product->api_user; 6759 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6760 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6761 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6762 mptmp[cp] = PETSC_TRUE; 6763 cp++; 6764 ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr); 6765 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6766 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6767 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6768 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6769 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6770 mp[cp]->product->api_user = product->api_user; 6771 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6772 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6773 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6774 rmapt[cp] = 2; 6775 rmapa[cp] = globidx; 6776 cmapt[cp] = 2; 6777 cmapa[cp] = P_oth_idx; 6778 mptmp[cp] = PETSC_FALSE; 6779 cp++; 6780 } 6781 break; 6782 default: 6783 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6784 } 6785 /* sanity check */ 6786 if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i); 6787 6788 ierr = PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);CHKERRQ(ierr); 6789 for (i = 0; i < cp; i++) { 6790 mmdata->mp[i] = mp[i]; 6791 mmdata->mptmp[i] = mptmp[i]; 6792 } 6793 mmdata->cp = cp; 6794 C->product->data = mmdata; 6795 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 6796 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 6797 6798 /* memory type */ 6799 mmdata->mtype = PETSC_MEMTYPE_HOST; 6800 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr); 6801 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr); 6802 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 6803 // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented 6804 //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE; 6805 6806 /* prepare coo coordinates for values insertion */ 6807 6808 /* count total nonzeros of those intermediate seqaij Mats 6809 ncoo_d: # of nonzeros of matrices that do not have offproc entries 6810 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 6811 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 6812 */ 6813 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 6814 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6815 if (mptmp[cp]) continue; 6816 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 6817 const PetscInt *rmap = rmapa[cp]; 6818 const PetscInt mr = mp[cp]->rmap->n; 6819 const PetscInt rs = C->rmap->rstart; 6820 const PetscInt re = C->rmap->rend; 6821 const PetscInt *ii = mm->i; 6822 for (i = 0; i < mr; i++) { 6823 const PetscInt gr = rmap[i]; 6824 const PetscInt nz = ii[i+1] - ii[i]; 6825 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 6826 else ncoo_oown += nz; /* this row is local */ 6827 } 6828 } else ncoo_d += mm->nz; 6829 } 6830 6831 /* 6832 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 6833 6834 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 6835 6836 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 6837 6838 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 6839 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 6840 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 6841 6842 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 6843 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 6844 */ 6845 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); /* +1 to make a csr-like data structure */ 6846 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr); 6847 6848 /* gather (i,j) of nonzeros inserted by remote procs */ 6849 if (hasoffproc) { 6850 PetscSF msf; 6851 PetscInt ncoo2,*coo_i2,*coo_j2; 6852 6853 ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr); 6854 ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr); 6855 ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); /* to collect (i,j) of entries to be sent to others */ 6856 6857 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 6858 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6859 PetscInt *idxoff = mmdata->off[cp]; 6860 PetscInt *idxown = mmdata->own[cp]; 6861 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 6862 const PetscInt *rmap = rmapa[cp]; 6863 const PetscInt *cmap = cmapa[cp]; 6864 const PetscInt *ii = mm->i; 6865 PetscInt *coi = coo_i + ncoo_o; 6866 PetscInt *coj = coo_j + ncoo_o; 6867 const PetscInt mr = mp[cp]->rmap->n; 6868 const PetscInt rs = C->rmap->rstart; 6869 const PetscInt re = C->rmap->rend; 6870 const PetscInt cs = C->cmap->rstart; 6871 for (i = 0; i < mr; i++) { 6872 const PetscInt *jj = mm->j + ii[i]; 6873 const PetscInt gr = rmap[i]; 6874 const PetscInt nz = ii[i+1] - ii[i]; 6875 if (gr < rs || gr >= re) { /* this is an offproc row */ 6876 for (j = ii[i]; j < ii[i+1]; j++) { 6877 *coi++ = gr; 6878 *idxoff++ = j; 6879 } 6880 if (!cmapt[cp]) { /* already global */ 6881 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6882 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6883 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6884 } else { /* offdiag */ 6885 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6886 } 6887 ncoo_o += nz; 6888 } else { /* this is a local row */ 6889 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 6890 } 6891 } 6892 } 6893 mmdata->off[cp + 1] = idxoff; 6894 mmdata->own[cp + 1] = idxown; 6895 } 6896 6897 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6898 ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr); 6899 ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr); 6900 ierr = PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);CHKERRQ(ierr); 6901 ncoo = ncoo_d + ncoo_oown + ncoo2; 6902 ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr); 6903 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); /* put (i,j) of remote nonzeros at back */ 6904 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6905 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6906 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6907 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6908 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 6909 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr); 6910 coo_i = coo_i2; 6911 coo_j = coo_j2; 6912 } else { /* no offproc values insertion */ 6913 ncoo = ncoo_d; 6914 ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr); 6915 6916 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6917 ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr); 6918 ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr); 6919 } 6920 mmdata->hasoffproc = hasoffproc; 6921 6922 /* gather (i,j) of nonzeros inserted locally */ 6923 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 6924 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6925 PetscInt *coi = coo_i + ncoo_d; 6926 PetscInt *coj = coo_j + ncoo_d; 6927 const PetscInt *jj = mm->j; 6928 const PetscInt *ii = mm->i; 6929 const PetscInt *cmap = cmapa[cp]; 6930 const PetscInt *rmap = rmapa[cp]; 6931 const PetscInt mr = mp[cp]->rmap->n; 6932 const PetscInt rs = C->rmap->rstart; 6933 const PetscInt re = C->rmap->rend; 6934 const PetscInt cs = C->cmap->rstart; 6935 6936 if (mptmp[cp]) continue; 6937 if (rmapt[cp] == 1) { /* consecutive rows */ 6938 /* fill coo_i */ 6939 for (i = 0; i < mr; i++) { 6940 const PetscInt gr = i + rs; 6941 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 6942 } 6943 /* fill coo_j */ 6944 if (!cmapt[cp]) { /* type-0, already global */ 6945 ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr); 6946 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 6947 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 6948 } else { /* type-2, local to global for sparse columns */ 6949 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 6950 } 6951 ncoo_d += mm->nz; 6952 } else if (rmapt[cp] == 2) { /* sparse rows */ 6953 for (i = 0; i < mr; i++) { 6954 const PetscInt *jj = mm->j + ii[i]; 6955 const PetscInt gr = rmap[i]; 6956 const PetscInt nz = ii[i+1] - ii[i]; 6957 if (gr >= rs && gr < re) { /* local rows */ 6958 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 6959 if (!cmapt[cp]) { /* type-0, already global */ 6960 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6961 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6962 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6963 } else { /* type-2, local to global for sparse columns */ 6964 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6965 } 6966 ncoo_d += nz; 6967 } 6968 } 6969 } 6970 } 6971 if (glob) { 6972 ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr); 6973 } 6974 ierr = ISDestroy(&glob);CHKERRQ(ierr); 6975 if (P_oth_l2g) { 6976 ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6977 } 6978 ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr); 6979 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 6980 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr); 6981 6982 /* preallocate with COO data */ 6983 ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr); 6984 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6985 PetscFunctionReturn(0); 6986 } 6987 6988 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 6989 { 6990 Mat_Product *product = mat->product; 6991 PetscErrorCode ierr; 6992 #if defined(PETSC_HAVE_DEVICE) 6993 PetscBool match = PETSC_FALSE; 6994 PetscBool usecpu = PETSC_FALSE; 6995 #else 6996 PetscBool match = PETSC_TRUE; 6997 #endif 6998 6999 PetscFunctionBegin; 7000 MatCheckProduct(mat,1); 7001 #if defined(PETSC_HAVE_DEVICE) 7002 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7003 ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr); 7004 } 7005 if (match) { /* we can always fallback to the CPU if requested */ 7006 switch (product->type) { 7007 case MATPRODUCT_AB: 7008 if (product->api_user) { 7009 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 7010 ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7011 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7012 } else { 7013 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 7014 ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7015 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7016 } 7017 break; 7018 case MATPRODUCT_AtB: 7019 if (product->api_user) { 7020 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr); 7021 ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7022 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7023 } else { 7024 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr); 7025 ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7026 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7027 } 7028 break; 7029 case MATPRODUCT_PtAP: 7030 if (product->api_user) { 7031 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 7032 ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7033 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7034 } else { 7035 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 7036 ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7037 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7038 } 7039 break; 7040 default: 7041 break; 7042 } 7043 match = (PetscBool)!usecpu; 7044 } 7045 #endif 7046 if (match) { 7047 switch (product->type) { 7048 case MATPRODUCT_AB: 7049 case MATPRODUCT_AtB: 7050 case MATPRODUCT_PtAP: 7051 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7052 break; 7053 default: 7054 break; 7055 } 7056 } 7057 /* fallback to MPIAIJ ops */ 7058 if (!mat->ops->productsymbolic) { 7059 ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr); 7060 } 7061 PetscFunctionReturn(0); 7062 } 7063