1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 66 { 67 PetscErrorCode ierr; 68 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 69 70 PetscFunctionBegin; 71 if (mat->A) { 72 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 73 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 74 } 75 PetscFunctionReturn(0); 76 } 77 78 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 79 { 80 PetscErrorCode ierr; 81 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 82 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 83 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 84 const PetscInt *ia,*ib; 85 const MatScalar *aa,*bb,*aav,*bav; 86 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 87 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 88 89 PetscFunctionBegin; 90 *keptrows = NULL; 91 92 ia = a->i; 93 ib = b->i; 94 ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr); 95 ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr); 96 for (i=0; i<m; i++) { 97 na = ia[i+1] - ia[i]; 98 nb = ib[i+1] - ib[i]; 99 if (!na && !nb) { 100 cnt++; 101 goto ok1; 102 } 103 aa = aav + ia[i]; 104 for (j=0; j<na; j++) { 105 if (aa[j] != 0.0) goto ok1; 106 } 107 bb = bav + ib[i]; 108 for (j=0; j <nb; j++) { 109 if (bb[j] != 0.0) goto ok1; 110 } 111 cnt++; 112 ok1:; 113 } 114 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr); 115 if (!n0rows) { 116 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 117 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 121 cnt = 0; 122 for (i=0; i<m; i++) { 123 na = ia[i+1] - ia[i]; 124 nb = ib[i+1] - ib[i]; 125 if (!na && !nb) continue; 126 aa = aav + ia[i]; 127 for (j=0; j<na;j++) { 128 if (aa[j] != 0.0) { 129 rows[cnt++] = rstart + i; 130 goto ok2; 131 } 132 } 133 bb = bav + ib[i]; 134 for (j=0; j<nb; j++) { 135 if (bb[j] != 0.0) { 136 rows[cnt++] = rstart + i; 137 goto ok2; 138 } 139 } 140 ok2:; 141 } 142 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 143 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 144 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 148 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 149 { 150 PetscErrorCode ierr; 151 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 152 PetscBool cong; 153 154 PetscFunctionBegin; 155 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 156 if (Y->assembled && cong) { 157 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 158 } else { 159 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 160 } 161 PetscFunctionReturn(0); 162 } 163 164 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 165 { 166 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 167 PetscErrorCode ierr; 168 PetscInt i,rstart,nrows,*rows; 169 170 PetscFunctionBegin; 171 *zrows = NULL; 172 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 173 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 174 for (i=0; i<nrows; i++) rows[i] += rstart; 175 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 176 PetscFunctionReturn(0); 177 } 178 179 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 180 { 181 PetscErrorCode ierr; 182 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 183 PetscInt i,m,n,*garray = aij->garray; 184 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 185 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 186 PetscReal *work; 187 const PetscScalar *dummy; 188 189 PetscFunctionBegin; 190 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 191 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 192 ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr); 193 ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr); 194 ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr); 195 ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr); 196 if (type == NORM_2) { 197 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 198 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 199 } 200 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 201 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 202 } 203 } else if (type == NORM_1) { 204 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 205 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 206 } 207 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 208 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 209 } 210 } else if (type == NORM_INFINITY) { 211 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 212 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 213 } 214 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 215 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 216 } 217 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 218 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 219 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 220 } 221 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 222 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 223 } 224 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 225 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 226 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 227 } 228 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 229 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 230 } 231 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 232 if (type == NORM_INFINITY) { 233 ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 234 } else { 235 ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 236 } 237 ierr = PetscFree(work);CHKERRQ(ierr); 238 if (type == NORM_2) { 239 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 240 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 241 for (i=0; i<n; i++) reductions[i] /= m; 242 } 243 PetscFunctionReturn(0); 244 } 245 246 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 247 { 248 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 249 IS sis,gis; 250 PetscErrorCode ierr; 251 const PetscInt *isis,*igis; 252 PetscInt n,*iis,nsis,ngis,rstart,i; 253 254 PetscFunctionBegin; 255 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 256 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 257 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 258 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 259 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 260 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 261 262 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 263 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 264 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 265 n = ngis + nsis; 266 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 267 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 268 for (i=0; i<n; i++) iis[i] += rstart; 269 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 270 271 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 272 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 273 ierr = ISDestroy(&sis);CHKERRQ(ierr); 274 ierr = ISDestroy(&gis);CHKERRQ(ierr); 275 PetscFunctionReturn(0); 276 } 277 278 /* 279 Local utility routine that creates a mapping from the global column 280 number to the local number in the off-diagonal part of the local 281 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 282 a slightly higher hash table cost; without it it is not scalable (each processor 283 has an order N integer array but is fast to access. 284 */ 285 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt n = aij->B->cmap->n,i; 290 291 PetscFunctionBegin; 292 if (n && !aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 293 #if defined(PETSC_USE_CTABLE) 294 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 295 for (i=0; i<n; i++) { 296 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 297 } 298 #else 299 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 300 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 301 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 302 #endif 303 PetscFunctionReturn(0); 304 } 305 306 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 307 { \ 308 if (col <= lastcol1) low1 = 0; \ 309 else high1 = nrow1; \ 310 lastcol1 = col;\ 311 while (high1-low1 > 5) { \ 312 t = (low1+high1)/2; \ 313 if (rp1[t] > col) high1 = t; \ 314 else low1 = t; \ 315 } \ 316 for (_i=low1; _i<high1; _i++) { \ 317 if (rp1[_i] > col) break; \ 318 if (rp1[_i] == col) { \ 319 if (addv == ADD_VALUES) { \ 320 ap1[_i] += value; \ 321 /* Not sure LogFlops will slow dow the code or not */ \ 322 (void)PetscLogFlops(1.0); \ 323 } \ 324 else ap1[_i] = value; \ 325 inserted = PETSC_TRUE; \ 326 goto a_noinsert; \ 327 } \ 328 } \ 329 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 330 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 331 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 332 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 333 N = nrow1++ - 1; a->nz++; high1++; \ 334 /* shift up all the later entries in this row */ \ 335 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 336 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 337 rp1[_i] = col; \ 338 ap1[_i] = value; \ 339 A->nonzerostate++;\ 340 a_noinsert: ; \ 341 ailen[row] = nrow1; \ 342 } 343 344 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 345 { \ 346 if (col <= lastcol2) low2 = 0; \ 347 else high2 = nrow2; \ 348 lastcol2 = col; \ 349 while (high2-low2 > 5) { \ 350 t = (low2+high2)/2; \ 351 if (rp2[t] > col) high2 = t; \ 352 else low2 = t; \ 353 } \ 354 for (_i=low2; _i<high2; _i++) { \ 355 if (rp2[_i] > col) break; \ 356 if (rp2[_i] == col) { \ 357 if (addv == ADD_VALUES) { \ 358 ap2[_i] += value; \ 359 (void)PetscLogFlops(1.0); \ 360 } \ 361 else ap2[_i] = value; \ 362 inserted = PETSC_TRUE; \ 363 goto b_noinsert; \ 364 } \ 365 } \ 366 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 367 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 368 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 369 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 370 N = nrow2++ - 1; b->nz++; high2++; \ 371 /* shift up all the later entries in this row */ \ 372 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 373 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 374 rp2[_i] = col; \ 375 ap2[_i] = value; \ 376 B->nonzerostate++; \ 377 b_noinsert: ; \ 378 bilen[row] = nrow2; \ 379 } 380 381 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 382 { 383 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 384 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 385 PetscErrorCode ierr; 386 PetscInt l,*garray = mat->garray,diag; 387 388 PetscFunctionBegin; 389 /* code only works for square matrices A */ 390 391 /* find size of row to the left of the diagonal part */ 392 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 393 row = row - diag; 394 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 395 if (garray[b->j[b->i[row]+l]] > diag) break; 396 } 397 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 398 399 /* diagonal part */ 400 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 401 402 /* right of diagonal part */ 403 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 404 #if defined(PETSC_HAVE_DEVICE) 405 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 406 #endif 407 PetscFunctionReturn(0); 408 } 409 410 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 411 { 412 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 413 PetscScalar value = 0.0; 414 PetscErrorCode ierr; 415 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 416 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 417 PetscBool roworiented = aij->roworiented; 418 419 /* Some Variables required in the macro */ 420 Mat A = aij->A; 421 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 422 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 423 PetscBool ignorezeroentries = a->ignorezeroentries; 424 Mat B = aij->B; 425 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 426 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 427 MatScalar *aa,*ba; 428 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 429 * cannot use "#if defined" inside a macro. */ 430 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 431 432 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 433 PetscInt nonew; 434 MatScalar *ap1,*ap2; 435 436 PetscFunctionBegin; 437 #if defined(PETSC_HAVE_DEVICE) 438 if (A->offloadmask == PETSC_OFFLOAD_GPU) { 439 const PetscScalar *dummy; 440 ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr); 441 ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr); 442 } 443 if (B->offloadmask == PETSC_OFFLOAD_GPU) { 444 const PetscScalar *dummy; 445 ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr); 446 ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr); 447 } 448 #endif 449 aa = a->a; 450 ba = b->a; 451 for (i=0; i<m; i++) { 452 if (im[i] < 0) continue; 453 if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 454 if (im[i] >= rstart && im[i] < rend) { 455 row = im[i] - rstart; 456 lastcol1 = -1; 457 rp1 = aj + ai[row]; 458 ap1 = aa + ai[row]; 459 rmax1 = aimax[row]; 460 nrow1 = ailen[row]; 461 low1 = 0; 462 high1 = nrow1; 463 lastcol2 = -1; 464 rp2 = bj + bi[row]; 465 ap2 = ba + bi[row]; 466 rmax2 = bimax[row]; 467 nrow2 = bilen[row]; 468 low2 = 0; 469 high2 = nrow2; 470 471 for (j=0; j<n; j++) { 472 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 473 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 474 if (in[j] >= cstart && in[j] < cend) { 475 col = in[j] - cstart; 476 nonew = a->nonew; 477 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 478 #if defined(PETSC_HAVE_DEVICE) 479 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 480 #endif 481 } else if (in[j] < 0) continue; 482 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 483 else { 484 if (mat->was_assembled) { 485 if (!aij->colmap) { 486 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 487 } 488 #if defined(PETSC_USE_CTABLE) 489 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 490 col--; 491 #else 492 col = aij->colmap[in[j]] - 1; 493 #endif 494 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 495 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 496 col = in[j]; 497 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 498 B = aij->B; 499 b = (Mat_SeqAIJ*)B->data; 500 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 501 rp2 = bj + bi[row]; 502 ap2 = ba + bi[row]; 503 rmax2 = bimax[row]; 504 nrow2 = bilen[row]; 505 low2 = 0; 506 high2 = nrow2; 507 bm = aij->B->rmap->n; 508 ba = b->a; 509 inserted = PETSC_FALSE; 510 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 511 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 512 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 513 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 514 } 515 } else col = in[j]; 516 nonew = b->nonew; 517 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 518 #if defined(PETSC_HAVE_DEVICE) 519 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 520 #endif 521 } 522 } 523 } else { 524 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 525 if (!aij->donotstash) { 526 mat->assembled = PETSC_FALSE; 527 if (roworiented) { 528 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 529 } else { 530 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 531 } 532 } 533 } 534 } 535 PetscFunctionReturn(0); 536 } 537 538 /* 539 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 540 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 541 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 542 */ 543 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 544 { 545 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 546 Mat A = aij->A; /* diagonal part of the matrix */ 547 Mat B = aij->B; /* offdiagonal part of the matrix */ 548 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 549 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 550 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 551 PetscInt *ailen = a->ilen,*aj = a->j; 552 PetscInt *bilen = b->ilen,*bj = b->j; 553 PetscInt am = aij->A->rmap->n,j; 554 PetscInt diag_so_far = 0,dnz; 555 PetscInt offd_so_far = 0,onz; 556 557 PetscFunctionBegin; 558 /* Iterate over all rows of the matrix */ 559 for (j=0; j<am; j++) { 560 dnz = onz = 0; 561 /* Iterate over all non-zero columns of the current row */ 562 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 563 /* If column is in the diagonal */ 564 if (mat_j[col] >= cstart && mat_j[col] < cend) { 565 aj[diag_so_far++] = mat_j[col] - cstart; 566 dnz++; 567 } else { /* off-diagonal entries */ 568 bj[offd_so_far++] = mat_j[col]; 569 onz++; 570 } 571 } 572 ailen[j] = dnz; 573 bilen[j] = onz; 574 } 575 PetscFunctionReturn(0); 576 } 577 578 /* 579 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 580 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 581 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 582 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 583 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 584 */ 585 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 586 { 587 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 588 Mat A = aij->A; /* diagonal part of the matrix */ 589 Mat B = aij->B; /* offdiagonal part of the matrix */ 590 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 591 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 592 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 593 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 594 PetscInt *ailen = a->ilen,*aj = a->j; 595 PetscInt *bilen = b->ilen,*bj = b->j; 596 PetscInt am = aij->A->rmap->n,j; 597 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 598 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 599 PetscScalar *aa = a->a,*ba = b->a; 600 601 PetscFunctionBegin; 602 /* Iterate over all rows of the matrix */ 603 for (j=0; j<am; j++) { 604 dnz_row = onz_row = 0; 605 rowstart_offd = full_offd_i[j]; 606 rowstart_diag = full_diag_i[j]; 607 /* Iterate over all non-zero columns of the current row */ 608 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 609 /* If column is in the diagonal */ 610 if (mat_j[col] >= cstart && mat_j[col] < cend) { 611 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 612 aa[rowstart_diag+dnz_row] = mat_a[col]; 613 dnz_row++; 614 } else { /* off-diagonal entries */ 615 bj[rowstart_offd+onz_row] = mat_j[col]; 616 ba[rowstart_offd+onz_row] = mat_a[col]; 617 onz_row++; 618 } 619 } 620 ailen[j] = dnz_row; 621 bilen[j] = onz_row; 622 } 623 PetscFunctionReturn(0); 624 } 625 626 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 627 { 628 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 629 PetscErrorCode ierr; 630 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 631 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 632 633 PetscFunctionBegin; 634 for (i=0; i<m; i++) { 635 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 636 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 637 if (idxm[i] >= rstart && idxm[i] < rend) { 638 row = idxm[i] - rstart; 639 for (j=0; j<n; j++) { 640 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 641 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 642 if (idxn[j] >= cstart && idxn[j] < cend) { 643 col = idxn[j] - cstart; 644 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 645 } else { 646 if (!aij->colmap) { 647 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 648 } 649 #if defined(PETSC_USE_CTABLE) 650 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 651 col--; 652 #else 653 col = aij->colmap[idxn[j]] - 1; 654 #endif 655 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 656 else { 657 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 658 } 659 } 660 } 661 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 662 } 663 PetscFunctionReturn(0); 664 } 665 666 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 667 { 668 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 669 PetscErrorCode ierr; 670 PetscInt nstash,reallocs; 671 672 PetscFunctionBegin; 673 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 674 675 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 676 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 677 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 678 PetscFunctionReturn(0); 679 } 680 681 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 682 { 683 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 684 PetscErrorCode ierr; 685 PetscMPIInt n; 686 PetscInt i,j,rstart,ncols,flg; 687 PetscInt *row,*col; 688 PetscBool other_disassembled; 689 PetscScalar *val; 690 691 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 692 693 PetscFunctionBegin; 694 if (!aij->donotstash && !mat->nooffprocentries) { 695 while (1) { 696 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 697 if (!flg) break; 698 699 for (i=0; i<n;) { 700 /* Now identify the consecutive vals belonging to the same row */ 701 for (j=i,rstart=row[j]; j<n; j++) { 702 if (row[j] != rstart) break; 703 } 704 if (j < n) ncols = j-i; 705 else ncols = n-i; 706 /* Now assemble all these values with a single function call */ 707 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 708 i = j; 709 } 710 } 711 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 712 } 713 #if defined(PETSC_HAVE_DEVICE) 714 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 715 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 716 if (mat->boundtocpu) { 717 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 718 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 719 } 720 #endif 721 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 722 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 723 724 /* determine if any processor has disassembled, if so we must 725 also disassemble ourself, in order that we may reassemble. */ 726 /* 727 if nonzero structure of submatrix B cannot change then we know that 728 no processor disassembled thus we can skip this stuff 729 */ 730 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 731 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 732 if (mat->was_assembled && !other_disassembled) { 733 #if defined(PETSC_HAVE_DEVICE) 734 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 735 #endif 736 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 737 } 738 } 739 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 740 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 741 } 742 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 743 #if defined(PETSC_HAVE_DEVICE) 744 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 745 #endif 746 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 747 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 748 749 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 750 751 aij->rowvalues = NULL; 752 753 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 754 755 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 756 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 757 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 758 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 759 } 760 #if defined(PETSC_HAVE_DEVICE) 761 mat->offloadmask = PETSC_OFFLOAD_BOTH; 762 #endif 763 PetscFunctionReturn(0); 764 } 765 766 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 767 { 768 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 769 PetscErrorCode ierr; 770 771 PetscFunctionBegin; 772 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 773 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 774 PetscFunctionReturn(0); 775 } 776 777 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 778 { 779 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 780 PetscObjectState sA, sB; 781 PetscInt *lrows; 782 PetscInt r, len; 783 PetscBool cong, lch, gch; 784 PetscErrorCode ierr; 785 786 PetscFunctionBegin; 787 /* get locally owned rows */ 788 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 789 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 790 /* fix right hand side if needed */ 791 if (x && b) { 792 const PetscScalar *xx; 793 PetscScalar *bb; 794 795 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 796 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 797 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 798 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 799 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 800 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 801 } 802 803 sA = mat->A->nonzerostate; 804 sB = mat->B->nonzerostate; 805 806 if (diag != 0.0 && cong) { 807 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 808 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 809 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 810 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 811 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 812 PetscInt nnwA, nnwB; 813 PetscBool nnzA, nnzB; 814 815 nnwA = aijA->nonew; 816 nnwB = aijB->nonew; 817 nnzA = aijA->keepnonzeropattern; 818 nnzB = aijB->keepnonzeropattern; 819 if (!nnzA) { 820 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 821 aijA->nonew = 0; 822 } 823 if (!nnzB) { 824 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 825 aijB->nonew = 0; 826 } 827 /* Must zero here before the next loop */ 828 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 829 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 830 for (r = 0; r < len; ++r) { 831 const PetscInt row = lrows[r] + A->rmap->rstart; 832 if (row >= A->cmap->N) continue; 833 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 834 } 835 aijA->nonew = nnwA; 836 aijB->nonew = nnwB; 837 } else { 838 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 839 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 840 } 841 ierr = PetscFree(lrows);CHKERRQ(ierr); 842 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 843 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 844 845 /* reduce nonzerostate */ 846 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 847 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 848 if (gch) A->nonzerostate++; 849 PetscFunctionReturn(0); 850 } 851 852 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 853 { 854 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 855 PetscErrorCode ierr; 856 PetscMPIInt n = A->rmap->n; 857 PetscInt i,j,r,m,len = 0; 858 PetscInt *lrows,*owners = A->rmap->range; 859 PetscMPIInt p = 0; 860 PetscSFNode *rrows; 861 PetscSF sf; 862 const PetscScalar *xx; 863 PetscScalar *bb,*mask; 864 Vec xmask,lmask; 865 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 866 const PetscInt *aj, *ii,*ridx; 867 PetscScalar *aa; 868 869 PetscFunctionBegin; 870 /* Create SF where leaves are input rows and roots are owned rows */ 871 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 872 for (r = 0; r < n; ++r) lrows[r] = -1; 873 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 874 for (r = 0; r < N; ++r) { 875 const PetscInt idx = rows[r]; 876 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 877 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 878 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 879 } 880 rrows[r].rank = p; 881 rrows[r].index = rows[r] - owners[p]; 882 } 883 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 884 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 885 /* Collect flags for rows to be zeroed */ 886 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 887 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 888 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 889 /* Compress and put in row numbers */ 890 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 891 /* zero diagonal part of matrix */ 892 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 893 /* handle off diagonal part of matrix */ 894 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 895 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 896 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 897 for (i=0; i<len; i++) bb[lrows[i]] = 1; 898 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 899 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 900 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 901 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 902 if (x && b) { /* this code is buggy when the row and column layout don't match */ 903 PetscBool cong; 904 905 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 906 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 907 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 908 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 910 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 911 } 912 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 913 /* remove zeroed rows of off diagonal matrix */ 914 ii = aij->i; 915 for (i=0; i<len; i++) { 916 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 917 } 918 /* loop over all elements of off process part of matrix zeroing removed columns*/ 919 if (aij->compressedrow.use) { 920 m = aij->compressedrow.nrows; 921 ii = aij->compressedrow.i; 922 ridx = aij->compressedrow.rindex; 923 for (i=0; i<m; i++) { 924 n = ii[i+1] - ii[i]; 925 aj = aij->j + ii[i]; 926 aa = aij->a + ii[i]; 927 928 for (j=0; j<n; j++) { 929 if (PetscAbsScalar(mask[*aj])) { 930 if (b) bb[*ridx] -= *aa*xx[*aj]; 931 *aa = 0.0; 932 } 933 aa++; 934 aj++; 935 } 936 ridx++; 937 } 938 } else { /* do not use compressed row format */ 939 m = l->B->rmap->n; 940 for (i=0; i<m; i++) { 941 n = ii[i+1] - ii[i]; 942 aj = aij->j + ii[i]; 943 aa = aij->a + ii[i]; 944 for (j=0; j<n; j++) { 945 if (PetscAbsScalar(mask[*aj])) { 946 if (b) bb[i] -= *aa*xx[*aj]; 947 *aa = 0.0; 948 } 949 aa++; 950 aj++; 951 } 952 } 953 } 954 if (x && b) { 955 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 956 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 957 } 958 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 959 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 960 ierr = PetscFree(lrows);CHKERRQ(ierr); 961 962 /* only change matrix nonzero state if pattern was allowed to be changed */ 963 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 964 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 965 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 966 } 967 PetscFunctionReturn(0); 968 } 969 970 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 971 { 972 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 973 PetscErrorCode ierr; 974 PetscInt nt; 975 VecScatter Mvctx = a->Mvctx; 976 977 PetscFunctionBegin; 978 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 979 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 980 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 981 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 982 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 983 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } 986 987 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 988 { 989 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 990 PetscErrorCode ierr; 991 992 PetscFunctionBegin; 993 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } 996 997 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 998 { 999 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1000 PetscErrorCode ierr; 1001 VecScatter Mvctx = a->Mvctx; 1002 1003 PetscFunctionBegin; 1004 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1012 { 1013 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1014 PetscErrorCode ierr; 1015 1016 PetscFunctionBegin; 1017 /* do nondiagonal part */ 1018 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1019 /* do local part */ 1020 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1021 /* add partial results together */ 1022 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1023 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1024 PetscFunctionReturn(0); 1025 } 1026 1027 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1028 { 1029 MPI_Comm comm; 1030 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1031 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1032 IS Me,Notme; 1033 PetscErrorCode ierr; 1034 PetscInt M,N,first,last,*notme,i; 1035 PetscBool lf; 1036 PetscMPIInt size; 1037 1038 PetscFunctionBegin; 1039 /* Easy test: symmetric diagonal block */ 1040 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1041 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1042 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr); 1043 if (!*f) PetscFunctionReturn(0); 1044 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1045 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 1046 if (size == 1) PetscFunctionReturn(0); 1047 1048 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1049 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1050 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1051 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1052 for (i=0; i<first; i++) notme[i] = i; 1053 for (i=last; i<M; i++) notme[i-last+first] = i; 1054 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1055 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1056 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1057 Aoff = Aoffs[0]; 1058 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1059 Boff = Boffs[0]; 1060 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1061 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1062 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1063 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1064 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1065 ierr = PetscFree(notme);CHKERRQ(ierr); 1066 PetscFunctionReturn(0); 1067 } 1068 1069 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1070 { 1071 PetscErrorCode ierr; 1072 1073 PetscFunctionBegin; 1074 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } 1077 1078 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1079 { 1080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1081 PetscErrorCode ierr; 1082 1083 PetscFunctionBegin; 1084 /* do nondiagonal part */ 1085 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1086 /* do local part */ 1087 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1088 /* add partial results together */ 1089 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1090 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 /* 1095 This only works correctly for square matrices where the subblock A->A is the 1096 diagonal block 1097 */ 1098 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1099 { 1100 PetscErrorCode ierr; 1101 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1102 1103 PetscFunctionBegin; 1104 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1105 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1106 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1111 { 1112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1113 PetscErrorCode ierr; 1114 1115 PetscFunctionBegin; 1116 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1117 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1122 { 1123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 #if defined(PETSC_USE_LOG) 1128 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1129 #endif 1130 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1131 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1132 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1133 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1134 #if defined(PETSC_USE_CTABLE) 1135 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1136 #else 1137 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1138 #endif 1139 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1140 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1141 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1142 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1143 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1144 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1145 1146 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1147 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1148 1149 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1151 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1152 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1153 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1154 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1155 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1156 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1157 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1158 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1159 #if defined(PETSC_HAVE_CUDA) 1160 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1161 #endif 1162 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1163 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr); 1164 #endif 1165 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 1166 #if defined(PETSC_HAVE_ELEMENTAL) 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1168 #endif 1169 #if defined(PETSC_HAVE_SCALAPACK) 1170 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1171 #endif 1172 #if defined(PETSC_HAVE_HYPRE) 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1174 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1175 #endif 1176 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1177 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1178 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1179 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr); 1180 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr); 1181 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr); 1182 #if defined(PETSC_HAVE_MKL_SPARSE) 1183 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr); 1184 #endif 1185 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr); 1186 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1187 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr); 1188 PetscFunctionReturn(0); 1189 } 1190 1191 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1192 { 1193 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1194 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1195 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1196 const PetscInt *garray = aij->garray; 1197 const PetscScalar *aa,*ba; 1198 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1199 PetscInt *rowlens; 1200 PetscInt *colidxs; 1201 PetscScalar *matvals; 1202 PetscErrorCode ierr; 1203 1204 PetscFunctionBegin; 1205 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1206 1207 M = mat->rmap->N; 1208 N = mat->cmap->N; 1209 m = mat->rmap->n; 1210 rs = mat->rmap->rstart; 1211 cs = mat->cmap->rstart; 1212 nz = A->nz + B->nz; 1213 1214 /* write matrix header */ 1215 header[0] = MAT_FILE_CLASSID; 1216 header[1] = M; header[2] = N; header[3] = nz; 1217 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1218 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1219 1220 /* fill in and store row lengths */ 1221 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1222 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1223 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1224 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1225 1226 /* fill in and store column indices */ 1227 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1228 for (cnt=0, i=0; i<m; i++) { 1229 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1230 if (garray[B->j[jb]] > cs) break; 1231 colidxs[cnt++] = garray[B->j[jb]]; 1232 } 1233 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1234 colidxs[cnt++] = A->j[ja] + cs; 1235 for (; jb<B->i[i+1]; jb++) 1236 colidxs[cnt++] = garray[B->j[jb]]; 1237 } 1238 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1239 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1240 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1241 1242 /* fill in and store nonzero values */ 1243 ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr); 1244 ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr); 1245 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1246 for (cnt=0, i=0; i<m; i++) { 1247 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1248 if (garray[B->j[jb]] > cs) break; 1249 matvals[cnt++] = ba[jb]; 1250 } 1251 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1252 matvals[cnt++] = aa[ja]; 1253 for (; jb<B->i[i+1]; jb++) 1254 matvals[cnt++] = ba[jb]; 1255 } 1256 ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr); 1257 ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr); 1258 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1259 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1260 ierr = PetscFree(matvals);CHKERRQ(ierr); 1261 1262 /* write block size option to the viewer's .info file */ 1263 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #include <petscdraw.h> 1268 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1269 { 1270 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1271 PetscErrorCode ierr; 1272 PetscMPIInt rank = aij->rank,size = aij->size; 1273 PetscBool isdraw,iascii,isbinary; 1274 PetscViewer sviewer; 1275 PetscViewerFormat format; 1276 1277 PetscFunctionBegin; 1278 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1279 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1280 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1281 if (iascii) { 1282 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1283 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1284 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1285 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1286 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1287 for (i=0; i<(PetscInt)size; i++) { 1288 nmax = PetscMax(nmax,nz[i]); 1289 nmin = PetscMin(nmin,nz[i]); 1290 navg += nz[i]; 1291 } 1292 ierr = PetscFree(nz);CHKERRQ(ierr); 1293 navg = navg/size; 1294 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1295 PetscFunctionReturn(0); 1296 } 1297 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1298 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1299 MatInfo info; 1300 PetscInt *inodes=NULL; 1301 1302 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 1303 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1304 ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr); 1305 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1306 if (!inodes) { 1307 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1308 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1309 } else { 1310 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1311 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1312 } 1313 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1314 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1315 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1316 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1317 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1318 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1319 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1320 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1323 PetscInt inodecount,inodelimit,*inodes; 1324 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1325 if (inodes) { 1326 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1327 } else { 1328 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1329 } 1330 PetscFunctionReturn(0); 1331 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1332 PetscFunctionReturn(0); 1333 } 1334 } else if (isbinary) { 1335 if (size == 1) { 1336 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1337 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1338 } else { 1339 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1340 } 1341 PetscFunctionReturn(0); 1342 } else if (iascii && size == 1) { 1343 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1344 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1345 PetscFunctionReturn(0); 1346 } else if (isdraw) { 1347 PetscDraw draw; 1348 PetscBool isnull; 1349 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1350 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1351 if (isnull) PetscFunctionReturn(0); 1352 } 1353 1354 { /* assemble the entire matrix onto first processor */ 1355 Mat A = NULL, Av; 1356 IS isrow,iscol; 1357 1358 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1359 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1360 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1361 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1362 /* The commented code uses MatCreateSubMatrices instead */ 1363 /* 1364 Mat *AA, A = NULL, Av; 1365 IS isrow,iscol; 1366 1367 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1368 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1369 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1370 if (rank == 0) { 1371 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1372 A = AA[0]; 1373 Av = AA[0]; 1374 } 1375 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1376 */ 1377 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1378 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1379 /* 1380 Everyone has to call to draw the matrix since the graphics waits are 1381 synchronized across all processors that share the PetscDraw object 1382 */ 1383 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1384 if (rank == 0) { 1385 if (((PetscObject)mat)->name) { 1386 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1387 } 1388 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1389 } 1390 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1391 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1392 ierr = MatDestroy(&A);CHKERRQ(ierr); 1393 } 1394 PetscFunctionReturn(0); 1395 } 1396 1397 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1398 { 1399 PetscErrorCode ierr; 1400 PetscBool iascii,isdraw,issocket,isbinary; 1401 1402 PetscFunctionBegin; 1403 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1404 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1405 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1406 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1407 if (iascii || isdraw || isbinary || issocket) { 1408 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1409 } 1410 PetscFunctionReturn(0); 1411 } 1412 1413 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1414 { 1415 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1416 PetscErrorCode ierr; 1417 Vec bb1 = NULL; 1418 PetscBool hasop; 1419 1420 PetscFunctionBegin; 1421 if (flag == SOR_APPLY_UPPER) { 1422 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1423 PetscFunctionReturn(0); 1424 } 1425 1426 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1427 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1428 } 1429 1430 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1431 if (flag & SOR_ZERO_INITIAL_GUESS) { 1432 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1433 its--; 1434 } 1435 1436 while (its--) { 1437 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1438 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1439 1440 /* update rhs: bb1 = bb - B*x */ 1441 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1442 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1443 1444 /* local sweep */ 1445 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1446 } 1447 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1448 if (flag & SOR_ZERO_INITIAL_GUESS) { 1449 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1450 its--; 1451 } 1452 while (its--) { 1453 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1454 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1455 1456 /* update rhs: bb1 = bb - B*x */ 1457 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1458 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1459 1460 /* local sweep */ 1461 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1462 } 1463 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1464 if (flag & SOR_ZERO_INITIAL_GUESS) { 1465 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1466 its--; 1467 } 1468 while (its--) { 1469 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1470 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1471 1472 /* update rhs: bb1 = bb - B*x */ 1473 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1474 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1475 1476 /* local sweep */ 1477 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1478 } 1479 } else if (flag & SOR_EISENSTAT) { 1480 Vec xx1; 1481 1482 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1483 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1484 1485 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1486 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1487 if (!mat->diag) { 1488 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1489 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1490 } 1491 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1492 if (hasop) { 1493 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1494 } else { 1495 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1496 } 1497 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1498 1499 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1500 1501 /* local sweep */ 1502 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1503 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1504 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1505 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1506 1507 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1508 1509 matin->factorerrortype = mat->A->factorerrortype; 1510 PetscFunctionReturn(0); 1511 } 1512 1513 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1514 { 1515 Mat aA,aB,Aperm; 1516 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1517 PetscScalar *aa,*ba; 1518 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1519 PetscSF rowsf,sf; 1520 IS parcolp = NULL; 1521 PetscBool done; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1526 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1527 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1528 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1529 1530 /* Invert row permutation to find out where my rows should go */ 1531 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1532 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1533 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1534 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1535 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1536 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1537 1538 /* Invert column permutation to find out where my columns should go */ 1539 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1540 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1541 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1542 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1543 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1544 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1545 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1546 1547 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1548 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1549 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1550 1551 /* Find out where my gcols should go */ 1552 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1553 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1554 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1555 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1556 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1557 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1558 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1559 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1560 1561 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1562 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1563 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1564 for (i=0; i<m; i++) { 1565 PetscInt row = rdest[i]; 1566 PetscMPIInt rowner; 1567 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1568 for (j=ai[i]; j<ai[i+1]; j++) { 1569 PetscInt col = cdest[aj[j]]; 1570 PetscMPIInt cowner; 1571 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1572 if (rowner == cowner) dnnz[i]++; 1573 else onnz[i]++; 1574 } 1575 for (j=bi[i]; j<bi[i+1]; j++) { 1576 PetscInt col = gcdest[bj[j]]; 1577 PetscMPIInt cowner; 1578 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1579 if (rowner == cowner) dnnz[i]++; 1580 else onnz[i]++; 1581 } 1582 } 1583 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1584 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1585 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1586 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1587 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1588 1589 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1590 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1591 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1592 for (i=0; i<m; i++) { 1593 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1594 PetscInt j0,rowlen; 1595 rowlen = ai[i+1] - ai[i]; 1596 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1597 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1598 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1599 } 1600 rowlen = bi[i+1] - bi[i]; 1601 for (j0=j=0; j<rowlen; j0=j) { 1602 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1603 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1604 } 1605 } 1606 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1607 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1608 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1609 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1610 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1611 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1612 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1613 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1614 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1615 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1616 *B = Aperm; 1617 PetscFunctionReturn(0); 1618 } 1619 1620 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1621 { 1622 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1623 PetscErrorCode ierr; 1624 1625 PetscFunctionBegin; 1626 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1627 if (ghosts) *ghosts = aij->garray; 1628 PetscFunctionReturn(0); 1629 } 1630 1631 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1632 { 1633 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1634 Mat A = mat->A,B = mat->B; 1635 PetscErrorCode ierr; 1636 PetscLogDouble isend[5],irecv[5]; 1637 1638 PetscFunctionBegin; 1639 info->block_size = 1.0; 1640 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1641 1642 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1643 isend[3] = info->memory; isend[4] = info->mallocs; 1644 1645 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1646 1647 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1648 isend[3] += info->memory; isend[4] += info->mallocs; 1649 if (flag == MAT_LOCAL) { 1650 info->nz_used = isend[0]; 1651 info->nz_allocated = isend[1]; 1652 info->nz_unneeded = isend[2]; 1653 info->memory = isend[3]; 1654 info->mallocs = isend[4]; 1655 } else if (flag == MAT_GLOBAL_MAX) { 1656 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1657 1658 info->nz_used = irecv[0]; 1659 info->nz_allocated = irecv[1]; 1660 info->nz_unneeded = irecv[2]; 1661 info->memory = irecv[3]; 1662 info->mallocs = irecv[4]; 1663 } else if (flag == MAT_GLOBAL_SUM) { 1664 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1665 1666 info->nz_used = irecv[0]; 1667 info->nz_allocated = irecv[1]; 1668 info->nz_unneeded = irecv[2]; 1669 info->memory = irecv[3]; 1670 info->mallocs = irecv[4]; 1671 } 1672 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1673 info->fill_ratio_needed = 0; 1674 info->factor_mallocs = 0; 1675 PetscFunctionReturn(0); 1676 } 1677 1678 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1679 { 1680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1681 PetscErrorCode ierr; 1682 1683 PetscFunctionBegin; 1684 switch (op) { 1685 case MAT_NEW_NONZERO_LOCATIONS: 1686 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1687 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1688 case MAT_KEEP_NONZERO_PATTERN: 1689 case MAT_NEW_NONZERO_LOCATION_ERR: 1690 case MAT_USE_INODES: 1691 case MAT_IGNORE_ZERO_ENTRIES: 1692 case MAT_FORM_EXPLICIT_TRANSPOSE: 1693 MatCheckPreallocated(A,1); 1694 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1695 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1696 break; 1697 case MAT_ROW_ORIENTED: 1698 MatCheckPreallocated(A,1); 1699 a->roworiented = flg; 1700 1701 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1702 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1703 break; 1704 case MAT_FORCE_DIAGONAL_ENTRIES: 1705 case MAT_SORTED_FULL: 1706 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1707 break; 1708 case MAT_IGNORE_OFF_PROC_ENTRIES: 1709 a->donotstash = flg; 1710 break; 1711 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1712 case MAT_SPD: 1713 case MAT_SYMMETRIC: 1714 case MAT_STRUCTURALLY_SYMMETRIC: 1715 case MAT_HERMITIAN: 1716 case MAT_SYMMETRY_ETERNAL: 1717 break; 1718 case MAT_SUBMAT_SINGLEIS: 1719 A->submat_singleis = flg; 1720 break; 1721 case MAT_STRUCTURE_ONLY: 1722 /* The option is handled directly by MatSetOption() */ 1723 break; 1724 default: 1725 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1726 } 1727 PetscFunctionReturn(0); 1728 } 1729 1730 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1731 { 1732 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1733 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1734 PetscErrorCode ierr; 1735 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1736 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1737 PetscInt *cmap,*idx_p; 1738 1739 PetscFunctionBegin; 1740 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1741 mat->getrowactive = PETSC_TRUE; 1742 1743 if (!mat->rowvalues && (idx || v)) { 1744 /* 1745 allocate enough space to hold information from the longest row. 1746 */ 1747 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1748 PetscInt max = 1,tmp; 1749 for (i=0; i<matin->rmap->n; i++) { 1750 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1751 if (max < tmp) max = tmp; 1752 } 1753 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1754 } 1755 1756 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1757 lrow = row - rstart; 1758 1759 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1760 if (!v) {pvA = NULL; pvB = NULL;} 1761 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1762 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1763 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1764 nztot = nzA + nzB; 1765 1766 cmap = mat->garray; 1767 if (v || idx) { 1768 if (nztot) { 1769 /* Sort by increasing column numbers, assuming A and B already sorted */ 1770 PetscInt imark = -1; 1771 if (v) { 1772 *v = v_p = mat->rowvalues; 1773 for (i=0; i<nzB; i++) { 1774 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1775 else break; 1776 } 1777 imark = i; 1778 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1779 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1780 } 1781 if (idx) { 1782 *idx = idx_p = mat->rowindices; 1783 if (imark > -1) { 1784 for (i=0; i<imark; i++) { 1785 idx_p[i] = cmap[cworkB[i]]; 1786 } 1787 } else { 1788 for (i=0; i<nzB; i++) { 1789 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1790 else break; 1791 } 1792 imark = i; 1793 } 1794 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1795 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1796 } 1797 } else { 1798 if (idx) *idx = NULL; 1799 if (v) *v = NULL; 1800 } 1801 } 1802 *nz = nztot; 1803 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1804 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1805 PetscFunctionReturn(0); 1806 } 1807 1808 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1809 { 1810 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1811 1812 PetscFunctionBegin; 1813 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1814 aij->getrowactive = PETSC_FALSE; 1815 PetscFunctionReturn(0); 1816 } 1817 1818 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1819 { 1820 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1821 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1822 PetscErrorCode ierr; 1823 PetscInt i,j,cstart = mat->cmap->rstart; 1824 PetscReal sum = 0.0; 1825 MatScalar *v; 1826 1827 PetscFunctionBegin; 1828 if (aij->size == 1) { 1829 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1830 } else { 1831 if (type == NORM_FROBENIUS) { 1832 v = amat->a; 1833 for (i=0; i<amat->nz; i++) { 1834 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1835 } 1836 v = bmat->a; 1837 for (i=0; i<bmat->nz; i++) { 1838 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1839 } 1840 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1841 *norm = PetscSqrtReal(*norm); 1842 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1843 } else if (type == NORM_1) { /* max column norm */ 1844 PetscReal *tmp,*tmp2; 1845 PetscInt *jj,*garray = aij->garray; 1846 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1847 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1848 *norm = 0.0; 1849 v = amat->a; jj = amat->j; 1850 for (j=0; j<amat->nz; j++) { 1851 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1852 } 1853 v = bmat->a; jj = bmat->j; 1854 for (j=0; j<bmat->nz; j++) { 1855 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1856 } 1857 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1858 for (j=0; j<mat->cmap->N; j++) { 1859 if (tmp2[j] > *norm) *norm = tmp2[j]; 1860 } 1861 ierr = PetscFree(tmp);CHKERRQ(ierr); 1862 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1863 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1864 } else if (type == NORM_INFINITY) { /* max row norm */ 1865 PetscReal ntemp = 0.0; 1866 for (j=0; j<aij->A->rmap->n; j++) { 1867 v = amat->a + amat->i[j]; 1868 sum = 0.0; 1869 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1870 sum += PetscAbsScalar(*v); v++; 1871 } 1872 v = bmat->a + bmat->i[j]; 1873 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1874 sum += PetscAbsScalar(*v); v++; 1875 } 1876 if (sum > ntemp) ntemp = sum; 1877 } 1878 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1879 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1880 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1881 } 1882 PetscFunctionReturn(0); 1883 } 1884 1885 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1886 { 1887 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1888 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1889 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1890 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1891 PetscErrorCode ierr; 1892 Mat B,A_diag,*B_diag; 1893 const MatScalar *pbv,*bv; 1894 1895 PetscFunctionBegin; 1896 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1897 ai = Aloc->i; aj = Aloc->j; 1898 bi = Bloc->i; bj = Bloc->j; 1899 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1900 PetscInt *d_nnz,*g_nnz,*o_nnz; 1901 PetscSFNode *oloc; 1902 PETSC_UNUSED PetscSF sf; 1903 1904 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1905 /* compute d_nnz for preallocation */ 1906 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 1907 for (i=0; i<ai[ma]; i++) { 1908 d_nnz[aj[i]]++; 1909 } 1910 /* compute local off-diagonal contributions */ 1911 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 1912 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1913 /* map those to global */ 1914 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1915 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1916 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1917 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 1918 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1919 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1920 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1921 1922 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1923 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1924 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1925 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1926 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1927 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1928 } else { 1929 B = *matout; 1930 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1931 } 1932 1933 b = (Mat_MPIAIJ*)B->data; 1934 A_diag = a->A; 1935 B_diag = &b->A; 1936 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1937 A_diag_ncol = A_diag->cmap->N; 1938 B_diag_ilen = sub_B_diag->ilen; 1939 B_diag_i = sub_B_diag->i; 1940 1941 /* Set ilen for diagonal of B */ 1942 for (i=0; i<A_diag_ncol; i++) { 1943 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1944 } 1945 1946 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1947 very quickly (=without using MatSetValues), because all writes are local. */ 1948 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 1949 1950 /* copy over the B part */ 1951 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 1952 ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr); 1953 pbv = bv; 1954 row = A->rmap->rstart; 1955 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1956 cols_tmp = cols; 1957 for (i=0; i<mb; i++) { 1958 ncol = bi[i+1]-bi[i]; 1959 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr); 1960 row++; 1961 pbv += ncol; cols_tmp += ncol; 1962 } 1963 ierr = PetscFree(cols);CHKERRQ(ierr); 1964 ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr); 1965 1966 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1967 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1968 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1969 *matout = B; 1970 } else { 1971 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1972 } 1973 PetscFunctionReturn(0); 1974 } 1975 1976 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1977 { 1978 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1979 Mat a = aij->A,b = aij->B; 1980 PetscErrorCode ierr; 1981 PetscInt s1,s2,s3; 1982 1983 PetscFunctionBegin; 1984 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1985 if (rr) { 1986 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1987 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1988 /* Overlap communication with computation. */ 1989 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1990 } 1991 if (ll) { 1992 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1993 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1994 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1995 } 1996 /* scale the diagonal block */ 1997 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1998 1999 if (rr) { 2000 /* Do a scatter end and then right scale the off-diagonal block */ 2001 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2002 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 2003 } 2004 PetscFunctionReturn(0); 2005 } 2006 2007 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2008 { 2009 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2010 PetscErrorCode ierr; 2011 2012 PetscFunctionBegin; 2013 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2014 PetscFunctionReturn(0); 2015 } 2016 2017 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2018 { 2019 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2020 Mat a,b,c,d; 2021 PetscBool flg; 2022 PetscErrorCode ierr; 2023 2024 PetscFunctionBegin; 2025 a = matA->A; b = matA->B; 2026 c = matB->A; d = matB->B; 2027 2028 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2029 if (flg) { 2030 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2031 } 2032 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2033 PetscFunctionReturn(0); 2034 } 2035 2036 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2037 { 2038 PetscErrorCode ierr; 2039 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2040 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2041 2042 PetscFunctionBegin; 2043 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2044 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2045 /* because of the column compression in the off-processor part of the matrix a->B, 2046 the number of columns in a->B and b->B may be different, hence we cannot call 2047 the MatCopy() directly on the two parts. If need be, we can provide a more 2048 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2049 then copying the submatrices */ 2050 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2051 } else { 2052 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2053 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2054 } 2055 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2056 PetscFunctionReturn(0); 2057 } 2058 2059 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2060 { 2061 PetscErrorCode ierr; 2062 2063 PetscFunctionBegin; 2064 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 /* 2069 Computes the number of nonzeros per row needed for preallocation when X and Y 2070 have different nonzero structure. 2071 */ 2072 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2073 { 2074 PetscInt i,j,k,nzx,nzy; 2075 2076 PetscFunctionBegin; 2077 /* Set the number of nonzeros in the new matrix */ 2078 for (i=0; i<m; i++) { 2079 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2080 nzx = xi[i+1] - xi[i]; 2081 nzy = yi[i+1] - yi[i]; 2082 nnz[i] = 0; 2083 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2084 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2085 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2086 nnz[i]++; 2087 } 2088 for (; k<nzy; k++) nnz[i]++; 2089 } 2090 PetscFunctionReturn(0); 2091 } 2092 2093 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2094 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2095 { 2096 PetscErrorCode ierr; 2097 PetscInt m = Y->rmap->N; 2098 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2099 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2100 2101 PetscFunctionBegin; 2102 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2103 PetscFunctionReturn(0); 2104 } 2105 2106 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2107 { 2108 PetscErrorCode ierr; 2109 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2110 2111 PetscFunctionBegin; 2112 if (str == SAME_NONZERO_PATTERN) { 2113 ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr); 2114 ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr); 2115 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2116 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2117 } else { 2118 Mat B; 2119 PetscInt *nnz_d,*nnz_o; 2120 2121 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2122 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2123 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2124 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2125 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2126 ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr); 2127 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2128 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2129 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2130 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2131 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2132 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2133 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2134 } 2135 PetscFunctionReturn(0); 2136 } 2137 2138 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2139 2140 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2141 { 2142 #if defined(PETSC_USE_COMPLEX) 2143 PetscErrorCode ierr; 2144 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2145 2146 PetscFunctionBegin; 2147 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2148 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2149 #else 2150 PetscFunctionBegin; 2151 #endif 2152 PetscFunctionReturn(0); 2153 } 2154 2155 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2156 { 2157 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2158 PetscErrorCode ierr; 2159 2160 PetscFunctionBegin; 2161 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2162 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2163 PetscFunctionReturn(0); 2164 } 2165 2166 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2167 { 2168 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2169 PetscErrorCode ierr; 2170 2171 PetscFunctionBegin; 2172 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2173 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2174 PetscFunctionReturn(0); 2175 } 2176 2177 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2178 { 2179 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2180 PetscErrorCode ierr; 2181 PetscInt i,*idxb = NULL,m = A->rmap->n; 2182 PetscScalar *va,*vv; 2183 Vec vB,vA; 2184 const PetscScalar *vb; 2185 2186 PetscFunctionBegin; 2187 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 2188 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2189 2190 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2191 if (idx) { 2192 for (i=0; i<m; i++) { 2193 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2194 } 2195 } 2196 2197 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 2198 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 2199 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2200 2201 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2202 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2203 for (i=0; i<m; i++) { 2204 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2205 vv[i] = vb[i]; 2206 if (idx) idx[i] = a->garray[idxb[i]]; 2207 } else { 2208 vv[i] = va[i]; 2209 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2210 idx[i] = a->garray[idxb[i]]; 2211 } 2212 } 2213 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2214 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2215 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2216 ierr = PetscFree(idxb);CHKERRQ(ierr); 2217 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2218 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2219 PetscFunctionReturn(0); 2220 } 2221 2222 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2223 { 2224 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2225 PetscInt m = A->rmap->n,n = A->cmap->n; 2226 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2227 PetscInt *cmap = mat->garray; 2228 PetscInt *diagIdx, *offdiagIdx; 2229 Vec diagV, offdiagV; 2230 PetscScalar *a, *diagA, *offdiagA; 2231 const PetscScalar *ba,*bav; 2232 PetscInt r,j,col,ncols,*bi,*bj; 2233 PetscErrorCode ierr; 2234 Mat B = mat->B; 2235 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2236 2237 PetscFunctionBegin; 2238 /* When a process holds entire A and other processes have no entry */ 2239 if (A->cmap->N == n) { 2240 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2241 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2242 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr); 2243 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2244 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2245 PetscFunctionReturn(0); 2246 } else if (n == 0) { 2247 if (m) { 2248 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2249 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2250 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2251 } 2252 PetscFunctionReturn(0); 2253 } 2254 2255 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2256 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2257 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2258 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2259 2260 /* Get offdiagIdx[] for implicit 0.0 */ 2261 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2262 ba = bav; 2263 bi = b->i; 2264 bj = b->j; 2265 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2266 for (r = 0; r < m; r++) { 2267 ncols = bi[r+1] - bi[r]; 2268 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2269 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2270 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2271 offdiagA[r] = 0.0; 2272 2273 /* Find first hole in the cmap */ 2274 for (j=0; j<ncols; j++) { 2275 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2276 if (col > j && j < cstart) { 2277 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2278 break; 2279 } else if (col > j + n && j >= cstart) { 2280 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2281 break; 2282 } 2283 } 2284 if (j == ncols && ncols < A->cmap->N - n) { 2285 /* a hole is outside compressed Bcols */ 2286 if (ncols == 0) { 2287 if (cstart) { 2288 offdiagIdx[r] = 0; 2289 } else offdiagIdx[r] = cend; 2290 } else { /* ncols > 0 */ 2291 offdiagIdx[r] = cmap[ncols-1] + 1; 2292 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2293 } 2294 } 2295 } 2296 2297 for (j=0; j<ncols; j++) { 2298 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2299 ba++; bj++; 2300 } 2301 } 2302 2303 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2304 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2305 for (r = 0; r < m; ++r) { 2306 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2307 a[r] = diagA[r]; 2308 if (idx) idx[r] = cstart + diagIdx[r]; 2309 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2310 a[r] = diagA[r]; 2311 if (idx) { 2312 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2313 idx[r] = cstart + diagIdx[r]; 2314 } else idx[r] = offdiagIdx[r]; 2315 } 2316 } else { 2317 a[r] = offdiagA[r]; 2318 if (idx) idx[r] = offdiagIdx[r]; 2319 } 2320 } 2321 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2322 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2323 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2324 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2325 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2326 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2327 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2328 PetscFunctionReturn(0); 2329 } 2330 2331 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2332 { 2333 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2334 PetscInt m = A->rmap->n,n = A->cmap->n; 2335 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2336 PetscInt *cmap = mat->garray; 2337 PetscInt *diagIdx, *offdiagIdx; 2338 Vec diagV, offdiagV; 2339 PetscScalar *a, *diagA, *offdiagA; 2340 const PetscScalar *ba,*bav; 2341 PetscInt r,j,col,ncols,*bi,*bj; 2342 PetscErrorCode ierr; 2343 Mat B = mat->B; 2344 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2345 2346 PetscFunctionBegin; 2347 /* When a process holds entire A and other processes have no entry */ 2348 if (A->cmap->N == n) { 2349 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2350 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2351 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2352 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2353 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2354 PetscFunctionReturn(0); 2355 } else if (n == 0) { 2356 if (m) { 2357 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2358 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2359 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2360 } 2361 PetscFunctionReturn(0); 2362 } 2363 2364 ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2365 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2366 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2367 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2368 2369 /* Get offdiagIdx[] for implicit 0.0 */ 2370 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2371 ba = bav; 2372 bi = b->i; 2373 bj = b->j; 2374 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2375 for (r = 0; r < m; r++) { 2376 ncols = bi[r+1] - bi[r]; 2377 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2378 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2379 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2380 offdiagA[r] = 0.0; 2381 2382 /* Find first hole in the cmap */ 2383 for (j=0; j<ncols; j++) { 2384 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2385 if (col > j && j < cstart) { 2386 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2387 break; 2388 } else if (col > j + n && j >= cstart) { 2389 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2390 break; 2391 } 2392 } 2393 if (j == ncols && ncols < A->cmap->N - n) { 2394 /* a hole is outside compressed Bcols */ 2395 if (ncols == 0) { 2396 if (cstart) { 2397 offdiagIdx[r] = 0; 2398 } else offdiagIdx[r] = cend; 2399 } else { /* ncols > 0 */ 2400 offdiagIdx[r] = cmap[ncols-1] + 1; 2401 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2402 } 2403 } 2404 } 2405 2406 for (j=0; j<ncols; j++) { 2407 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2408 ba++; bj++; 2409 } 2410 } 2411 2412 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2413 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2414 for (r = 0; r < m; ++r) { 2415 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2416 a[r] = diagA[r]; 2417 if (idx) idx[r] = cstart + diagIdx[r]; 2418 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2419 a[r] = diagA[r]; 2420 if (idx) { 2421 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2422 idx[r] = cstart + diagIdx[r]; 2423 } else idx[r] = offdiagIdx[r]; 2424 } 2425 } else { 2426 a[r] = offdiagA[r]; 2427 if (idx) idx[r] = offdiagIdx[r]; 2428 } 2429 } 2430 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2431 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2432 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2433 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2434 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2435 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2436 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2437 PetscFunctionReturn(0); 2438 } 2439 2440 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2441 { 2442 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2443 PetscInt m = A->rmap->n,n = A->cmap->n; 2444 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2445 PetscInt *cmap = mat->garray; 2446 PetscInt *diagIdx, *offdiagIdx; 2447 Vec diagV, offdiagV; 2448 PetscScalar *a, *diagA, *offdiagA; 2449 const PetscScalar *ba,*bav; 2450 PetscInt r,j,col,ncols,*bi,*bj; 2451 PetscErrorCode ierr; 2452 Mat B = mat->B; 2453 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2454 2455 PetscFunctionBegin; 2456 /* When a process holds entire A and other processes have no entry */ 2457 if (A->cmap->N == n) { 2458 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2459 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2460 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2461 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2462 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2463 PetscFunctionReturn(0); 2464 } else if (n == 0) { 2465 if (m) { 2466 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2467 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2468 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2469 } 2470 PetscFunctionReturn(0); 2471 } 2472 2473 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2474 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2475 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2476 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2477 2478 /* Get offdiagIdx[] for implicit 0.0 */ 2479 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2480 ba = bav; 2481 bi = b->i; 2482 bj = b->j; 2483 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2484 for (r = 0; r < m; r++) { 2485 ncols = bi[r+1] - bi[r]; 2486 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2487 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2488 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2489 offdiagA[r] = 0.0; 2490 2491 /* Find first hole in the cmap */ 2492 for (j=0; j<ncols; j++) { 2493 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2494 if (col > j && j < cstart) { 2495 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2496 break; 2497 } else if (col > j + n && j >= cstart) { 2498 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2499 break; 2500 } 2501 } 2502 if (j == ncols && ncols < A->cmap->N - n) { 2503 /* a hole is outside compressed Bcols */ 2504 if (ncols == 0) { 2505 if (cstart) { 2506 offdiagIdx[r] = 0; 2507 } else offdiagIdx[r] = cend; 2508 } else { /* ncols > 0 */ 2509 offdiagIdx[r] = cmap[ncols-1] + 1; 2510 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2511 } 2512 } 2513 } 2514 2515 for (j=0; j<ncols; j++) { 2516 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2517 ba++; bj++; 2518 } 2519 } 2520 2521 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2522 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2523 for (r = 0; r < m; ++r) { 2524 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2525 a[r] = diagA[r]; 2526 if (idx) idx[r] = cstart + diagIdx[r]; 2527 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2528 a[r] = diagA[r]; 2529 if (idx) { 2530 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2531 idx[r] = cstart + diagIdx[r]; 2532 } else idx[r] = offdiagIdx[r]; 2533 } 2534 } else { 2535 a[r] = offdiagA[r]; 2536 if (idx) idx[r] = offdiagIdx[r]; 2537 } 2538 } 2539 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2540 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2541 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2542 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2543 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2544 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2545 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2546 PetscFunctionReturn(0); 2547 } 2548 2549 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2550 { 2551 PetscErrorCode ierr; 2552 Mat *dummy; 2553 2554 PetscFunctionBegin; 2555 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2556 *newmat = *dummy; 2557 ierr = PetscFree(dummy);CHKERRQ(ierr); 2558 PetscFunctionReturn(0); 2559 } 2560 2561 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2562 { 2563 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2564 PetscErrorCode ierr; 2565 2566 PetscFunctionBegin; 2567 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2568 A->factorerrortype = a->A->factorerrortype; 2569 PetscFunctionReturn(0); 2570 } 2571 2572 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2573 { 2574 PetscErrorCode ierr; 2575 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2576 2577 PetscFunctionBegin; 2578 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2579 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2580 if (x->assembled) { 2581 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2582 } else { 2583 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2584 } 2585 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2586 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2587 PetscFunctionReturn(0); 2588 } 2589 2590 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2591 { 2592 PetscFunctionBegin; 2593 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2594 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2595 PetscFunctionReturn(0); 2596 } 2597 2598 /*@ 2599 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2600 2601 Collective on Mat 2602 2603 Input Parameters: 2604 + A - the matrix 2605 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2606 2607 Level: advanced 2608 2609 @*/ 2610 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2611 { 2612 PetscErrorCode ierr; 2613 2614 PetscFunctionBegin; 2615 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2616 PetscFunctionReturn(0); 2617 } 2618 2619 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2620 { 2621 PetscErrorCode ierr; 2622 PetscBool sc = PETSC_FALSE,flg; 2623 2624 PetscFunctionBegin; 2625 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2626 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2627 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2628 if (flg) { 2629 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2630 } 2631 ierr = PetscOptionsTail();CHKERRQ(ierr); 2632 PetscFunctionReturn(0); 2633 } 2634 2635 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2636 { 2637 PetscErrorCode ierr; 2638 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2639 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2640 2641 PetscFunctionBegin; 2642 if (!Y->preallocated) { 2643 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2644 } else if (!aij->nz) { 2645 PetscInt nonew = aij->nonew; 2646 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2647 aij->nonew = nonew; 2648 } 2649 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2650 PetscFunctionReturn(0); 2651 } 2652 2653 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2654 { 2655 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2656 PetscErrorCode ierr; 2657 2658 PetscFunctionBegin; 2659 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2660 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2661 if (d) { 2662 PetscInt rstart; 2663 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2664 *d += rstart; 2665 2666 } 2667 PetscFunctionReturn(0); 2668 } 2669 2670 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2671 { 2672 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2673 PetscErrorCode ierr; 2674 2675 PetscFunctionBegin; 2676 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2677 PetscFunctionReturn(0); 2678 } 2679 2680 /* -------------------------------------------------------------------*/ 2681 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2682 MatGetRow_MPIAIJ, 2683 MatRestoreRow_MPIAIJ, 2684 MatMult_MPIAIJ, 2685 /* 4*/ MatMultAdd_MPIAIJ, 2686 MatMultTranspose_MPIAIJ, 2687 MatMultTransposeAdd_MPIAIJ, 2688 NULL, 2689 NULL, 2690 NULL, 2691 /*10*/ NULL, 2692 NULL, 2693 NULL, 2694 MatSOR_MPIAIJ, 2695 MatTranspose_MPIAIJ, 2696 /*15*/ MatGetInfo_MPIAIJ, 2697 MatEqual_MPIAIJ, 2698 MatGetDiagonal_MPIAIJ, 2699 MatDiagonalScale_MPIAIJ, 2700 MatNorm_MPIAIJ, 2701 /*20*/ MatAssemblyBegin_MPIAIJ, 2702 MatAssemblyEnd_MPIAIJ, 2703 MatSetOption_MPIAIJ, 2704 MatZeroEntries_MPIAIJ, 2705 /*24*/ MatZeroRows_MPIAIJ, 2706 NULL, 2707 NULL, 2708 NULL, 2709 NULL, 2710 /*29*/ MatSetUp_MPIAIJ, 2711 NULL, 2712 NULL, 2713 MatGetDiagonalBlock_MPIAIJ, 2714 NULL, 2715 /*34*/ MatDuplicate_MPIAIJ, 2716 NULL, 2717 NULL, 2718 NULL, 2719 NULL, 2720 /*39*/ MatAXPY_MPIAIJ, 2721 MatCreateSubMatrices_MPIAIJ, 2722 MatIncreaseOverlap_MPIAIJ, 2723 MatGetValues_MPIAIJ, 2724 MatCopy_MPIAIJ, 2725 /*44*/ MatGetRowMax_MPIAIJ, 2726 MatScale_MPIAIJ, 2727 MatShift_MPIAIJ, 2728 MatDiagonalSet_MPIAIJ, 2729 MatZeroRowsColumns_MPIAIJ, 2730 /*49*/ MatSetRandom_MPIAIJ, 2731 NULL, 2732 NULL, 2733 NULL, 2734 NULL, 2735 /*54*/ MatFDColoringCreate_MPIXAIJ, 2736 NULL, 2737 MatSetUnfactored_MPIAIJ, 2738 MatPermute_MPIAIJ, 2739 NULL, 2740 /*59*/ MatCreateSubMatrix_MPIAIJ, 2741 MatDestroy_MPIAIJ, 2742 MatView_MPIAIJ, 2743 NULL, 2744 NULL, 2745 /*64*/ NULL, 2746 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2747 NULL, 2748 NULL, 2749 NULL, 2750 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2751 MatGetRowMinAbs_MPIAIJ, 2752 NULL, 2753 NULL, 2754 NULL, 2755 NULL, 2756 /*75*/ MatFDColoringApply_AIJ, 2757 MatSetFromOptions_MPIAIJ, 2758 NULL, 2759 NULL, 2760 MatFindZeroDiagonals_MPIAIJ, 2761 /*80*/ NULL, 2762 NULL, 2763 NULL, 2764 /*83*/ MatLoad_MPIAIJ, 2765 MatIsSymmetric_MPIAIJ, 2766 NULL, 2767 NULL, 2768 NULL, 2769 NULL, 2770 /*89*/ NULL, 2771 NULL, 2772 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2773 NULL, 2774 NULL, 2775 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2776 NULL, 2777 NULL, 2778 NULL, 2779 MatBindToCPU_MPIAIJ, 2780 /*99*/ MatProductSetFromOptions_MPIAIJ, 2781 NULL, 2782 NULL, 2783 MatConjugate_MPIAIJ, 2784 NULL, 2785 /*104*/MatSetValuesRow_MPIAIJ, 2786 MatRealPart_MPIAIJ, 2787 MatImaginaryPart_MPIAIJ, 2788 NULL, 2789 NULL, 2790 /*109*/NULL, 2791 NULL, 2792 MatGetRowMin_MPIAIJ, 2793 NULL, 2794 MatMissingDiagonal_MPIAIJ, 2795 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2796 NULL, 2797 MatGetGhosts_MPIAIJ, 2798 NULL, 2799 NULL, 2800 /*119*/MatMultDiagonalBlock_MPIAIJ, 2801 NULL, 2802 NULL, 2803 NULL, 2804 MatGetMultiProcBlock_MPIAIJ, 2805 /*124*/MatFindNonzeroRows_MPIAIJ, 2806 MatGetColumnReductions_MPIAIJ, 2807 MatInvertBlockDiagonal_MPIAIJ, 2808 MatInvertVariableBlockDiagonal_MPIAIJ, 2809 MatCreateSubMatricesMPI_MPIAIJ, 2810 /*129*/NULL, 2811 NULL, 2812 NULL, 2813 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2814 NULL, 2815 /*134*/NULL, 2816 NULL, 2817 NULL, 2818 NULL, 2819 NULL, 2820 /*139*/MatSetBlockSizes_MPIAIJ, 2821 NULL, 2822 NULL, 2823 MatFDColoringSetUp_MPIXAIJ, 2824 MatFindOffBlockDiagonalEntries_MPIAIJ, 2825 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2826 /*145*/NULL, 2827 NULL, 2828 NULL 2829 }; 2830 2831 /* ----------------------------------------------------------------------------------------*/ 2832 2833 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2834 { 2835 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2836 PetscErrorCode ierr; 2837 2838 PetscFunctionBegin; 2839 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2840 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2841 PetscFunctionReturn(0); 2842 } 2843 2844 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2845 { 2846 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2847 PetscErrorCode ierr; 2848 2849 PetscFunctionBegin; 2850 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2851 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2852 PetscFunctionReturn(0); 2853 } 2854 2855 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2856 { 2857 Mat_MPIAIJ *b; 2858 PetscErrorCode ierr; 2859 PetscMPIInt size; 2860 2861 PetscFunctionBegin; 2862 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2863 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2864 b = (Mat_MPIAIJ*)B->data; 2865 2866 #if defined(PETSC_USE_CTABLE) 2867 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2868 #else 2869 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2870 #endif 2871 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2872 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2873 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2874 2875 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2876 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 2877 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2878 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2879 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2880 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2881 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2882 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2883 2884 if (!B->preallocated) { 2885 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2886 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2887 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2888 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2889 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2890 } 2891 2892 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2893 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2894 B->preallocated = PETSC_TRUE; 2895 B->was_assembled = PETSC_FALSE; 2896 B->assembled = PETSC_FALSE; 2897 PetscFunctionReturn(0); 2898 } 2899 2900 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2901 { 2902 Mat_MPIAIJ *b; 2903 PetscErrorCode ierr; 2904 2905 PetscFunctionBegin; 2906 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2907 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2908 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2909 b = (Mat_MPIAIJ*)B->data; 2910 2911 #if defined(PETSC_USE_CTABLE) 2912 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2913 #else 2914 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2915 #endif 2916 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2917 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2918 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2919 2920 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2921 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2922 B->preallocated = PETSC_TRUE; 2923 B->was_assembled = PETSC_FALSE; 2924 B->assembled = PETSC_FALSE; 2925 PetscFunctionReturn(0); 2926 } 2927 2928 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2929 { 2930 Mat mat; 2931 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2932 PetscErrorCode ierr; 2933 2934 PetscFunctionBegin; 2935 *newmat = NULL; 2936 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2937 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2938 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2939 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2940 a = (Mat_MPIAIJ*)mat->data; 2941 2942 mat->factortype = matin->factortype; 2943 mat->assembled = matin->assembled; 2944 mat->insertmode = NOT_SET_VALUES; 2945 mat->preallocated = matin->preallocated; 2946 2947 a->size = oldmat->size; 2948 a->rank = oldmat->rank; 2949 a->donotstash = oldmat->donotstash; 2950 a->roworiented = oldmat->roworiented; 2951 a->rowindices = NULL; 2952 a->rowvalues = NULL; 2953 a->getrowactive = PETSC_FALSE; 2954 2955 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2956 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2957 2958 if (oldmat->colmap) { 2959 #if defined(PETSC_USE_CTABLE) 2960 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2961 #else 2962 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2963 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2964 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2965 #endif 2966 } else a->colmap = NULL; 2967 if (oldmat->garray) { 2968 PetscInt len; 2969 len = oldmat->B->cmap->n; 2970 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2971 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2972 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2973 } else a->garray = NULL; 2974 2975 /* It may happen MatDuplicate is called with a non-assembled matrix 2976 In fact, MatDuplicate only requires the matrix to be preallocated 2977 This may happen inside a DMCreateMatrix_Shell */ 2978 if (oldmat->lvec) { 2979 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2980 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2981 } 2982 if (oldmat->Mvctx) { 2983 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2984 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2985 } 2986 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2987 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2988 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2989 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2990 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2991 *newmat = mat; 2992 PetscFunctionReturn(0); 2993 } 2994 2995 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2996 { 2997 PetscBool isbinary, ishdf5; 2998 PetscErrorCode ierr; 2999 3000 PetscFunctionBegin; 3001 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 3002 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 3003 /* force binary viewer to load .info file if it has not yet done so */ 3004 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3005 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3006 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 3007 if (isbinary) { 3008 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 3009 } else if (ishdf5) { 3010 #if defined(PETSC_HAVE_HDF5) 3011 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 3012 #else 3013 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3014 #endif 3015 } else { 3016 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3017 } 3018 PetscFunctionReturn(0); 3019 } 3020 3021 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3022 { 3023 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3024 PetscInt *rowidxs,*colidxs; 3025 PetscScalar *matvals; 3026 PetscErrorCode ierr; 3027 3028 PetscFunctionBegin; 3029 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3030 3031 /* read in matrix header */ 3032 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3033 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3034 M = header[1]; N = header[2]; nz = header[3]; 3035 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3036 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3037 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3038 3039 /* set block sizes from the viewer's .info file */ 3040 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3041 /* set global sizes if not set already */ 3042 if (mat->rmap->N < 0) mat->rmap->N = M; 3043 if (mat->cmap->N < 0) mat->cmap->N = N; 3044 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3045 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3046 3047 /* check if the matrix sizes are correct */ 3048 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3049 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3050 3051 /* read in row lengths and build row indices */ 3052 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3053 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3054 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3055 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3056 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr); 3057 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3058 /* read in column indices and matrix values */ 3059 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3060 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3061 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3062 /* store matrix indices and values */ 3063 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3064 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3065 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3066 PetscFunctionReturn(0); 3067 } 3068 3069 /* Not scalable because of ISAllGather() unless getting all columns. */ 3070 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3071 { 3072 PetscErrorCode ierr; 3073 IS iscol_local; 3074 PetscBool isstride; 3075 PetscMPIInt lisstride=0,gisstride; 3076 3077 PetscFunctionBegin; 3078 /* check if we are grabbing all columns*/ 3079 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3080 3081 if (isstride) { 3082 PetscInt start,len,mstart,mlen; 3083 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3084 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3085 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3086 if (mstart == start && mlen-mstart == len) lisstride = 1; 3087 } 3088 3089 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3090 if (gisstride) { 3091 PetscInt N; 3092 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3093 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3094 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3095 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3096 } else { 3097 PetscInt cbs; 3098 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3099 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3100 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3101 } 3102 3103 *isseq = iscol_local; 3104 PetscFunctionReturn(0); 3105 } 3106 3107 /* 3108 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3109 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3110 3111 Input Parameters: 3112 mat - matrix 3113 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3114 i.e., mat->rstart <= isrow[i] < mat->rend 3115 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3116 i.e., mat->cstart <= iscol[i] < mat->cend 3117 Output Parameter: 3118 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3119 iscol_o - sequential column index set for retrieving mat->B 3120 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3121 */ 3122 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3123 { 3124 PetscErrorCode ierr; 3125 Vec x,cmap; 3126 const PetscInt *is_idx; 3127 PetscScalar *xarray,*cmaparray; 3128 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3129 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3130 Mat B=a->B; 3131 Vec lvec=a->lvec,lcmap; 3132 PetscInt i,cstart,cend,Bn=B->cmap->N; 3133 MPI_Comm comm; 3134 VecScatter Mvctx=a->Mvctx; 3135 3136 PetscFunctionBegin; 3137 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3138 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3139 3140 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3141 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3142 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3143 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3144 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3145 3146 /* Get start indices */ 3147 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3148 isstart -= ncols; 3149 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3150 3151 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3152 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3153 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3154 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3155 for (i=0; i<ncols; i++) { 3156 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3157 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3158 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3159 } 3160 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3161 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3162 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3163 3164 /* Get iscol_d */ 3165 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3166 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3167 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3168 3169 /* Get isrow_d */ 3170 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3171 rstart = mat->rmap->rstart; 3172 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3173 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3174 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3175 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3176 3177 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3178 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3179 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3180 3181 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3182 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3183 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3184 3185 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3186 3187 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3188 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3189 3190 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3191 /* off-process column indices */ 3192 count = 0; 3193 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3194 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3195 3196 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3197 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3198 for (i=0; i<Bn; i++) { 3199 if (PetscRealPart(xarray[i]) > -1.0) { 3200 idx[count] = i; /* local column index in off-diagonal part B */ 3201 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3202 count++; 3203 } 3204 } 3205 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3206 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3207 3208 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3209 /* cannot ensure iscol_o has same blocksize as iscol! */ 3210 3211 ierr = PetscFree(idx);CHKERRQ(ierr); 3212 *garray = cmap1; 3213 3214 ierr = VecDestroy(&x);CHKERRQ(ierr); 3215 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3216 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3217 PetscFunctionReturn(0); 3218 } 3219 3220 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3221 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3222 { 3223 PetscErrorCode ierr; 3224 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3225 Mat M = NULL; 3226 MPI_Comm comm; 3227 IS iscol_d,isrow_d,iscol_o; 3228 Mat Asub = NULL,Bsub = NULL; 3229 PetscInt n; 3230 3231 PetscFunctionBegin; 3232 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3233 3234 if (call == MAT_REUSE_MATRIX) { 3235 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3236 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3237 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3238 3239 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3240 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3241 3242 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3243 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3244 3245 /* Update diagonal and off-diagonal portions of submat */ 3246 asub = (Mat_MPIAIJ*)(*submat)->data; 3247 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3248 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3249 if (n) { 3250 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3251 } 3252 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3253 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3254 3255 } else { /* call == MAT_INITIAL_MATRIX) */ 3256 const PetscInt *garray; 3257 PetscInt BsubN; 3258 3259 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3260 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3261 3262 /* Create local submatrices Asub and Bsub */ 3263 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3264 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3265 3266 /* Create submatrix M */ 3267 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3268 3269 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3270 asub = (Mat_MPIAIJ*)M->data; 3271 3272 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3273 n = asub->B->cmap->N; 3274 if (BsubN > n) { 3275 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3276 const PetscInt *idx; 3277 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3278 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3279 3280 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3281 j = 0; 3282 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3283 for (i=0; i<n; i++) { 3284 if (j >= BsubN) break; 3285 while (subgarray[i] > garray[j]) j++; 3286 3287 if (subgarray[i] == garray[j]) { 3288 idx_new[i] = idx[j++]; 3289 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3290 } 3291 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3292 3293 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3294 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3295 3296 } else if (BsubN < n) { 3297 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3298 } 3299 3300 ierr = PetscFree(garray);CHKERRQ(ierr); 3301 *submat = M; 3302 3303 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3304 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3305 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3306 3307 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3308 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3309 3310 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3311 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3312 } 3313 PetscFunctionReturn(0); 3314 } 3315 3316 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3317 { 3318 PetscErrorCode ierr; 3319 IS iscol_local=NULL,isrow_d; 3320 PetscInt csize; 3321 PetscInt n,i,j,start,end; 3322 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3323 MPI_Comm comm; 3324 3325 PetscFunctionBegin; 3326 /* If isrow has same processor distribution as mat, 3327 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3328 if (call == MAT_REUSE_MATRIX) { 3329 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3330 if (isrow_d) { 3331 sameRowDist = PETSC_TRUE; 3332 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3333 } else { 3334 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3335 if (iscol_local) { 3336 sameRowDist = PETSC_TRUE; 3337 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3338 } 3339 } 3340 } else { 3341 /* Check if isrow has same processor distribution as mat */ 3342 sameDist[0] = PETSC_FALSE; 3343 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3344 if (!n) { 3345 sameDist[0] = PETSC_TRUE; 3346 } else { 3347 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3348 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3349 if (i >= start && j < end) { 3350 sameDist[0] = PETSC_TRUE; 3351 } 3352 } 3353 3354 /* Check if iscol has same processor distribution as mat */ 3355 sameDist[1] = PETSC_FALSE; 3356 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3357 if (!n) { 3358 sameDist[1] = PETSC_TRUE; 3359 } else { 3360 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3361 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3362 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3363 } 3364 3365 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3366 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr); 3367 sameRowDist = tsameDist[0]; 3368 } 3369 3370 if (sameRowDist) { 3371 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3372 /* isrow and iscol have same processor distribution as mat */ 3373 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3374 PetscFunctionReturn(0); 3375 } else { /* sameRowDist */ 3376 /* isrow has same processor distribution as mat */ 3377 if (call == MAT_INITIAL_MATRIX) { 3378 PetscBool sorted; 3379 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3380 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3381 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3382 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3383 3384 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3385 if (sorted) { 3386 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3387 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3388 PetscFunctionReturn(0); 3389 } 3390 } else { /* call == MAT_REUSE_MATRIX */ 3391 IS iscol_sub; 3392 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3393 if (iscol_sub) { 3394 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3395 PetscFunctionReturn(0); 3396 } 3397 } 3398 } 3399 } 3400 3401 /* General case: iscol -> iscol_local which has global size of iscol */ 3402 if (call == MAT_REUSE_MATRIX) { 3403 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3404 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3405 } else { 3406 if (!iscol_local) { 3407 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3408 } 3409 } 3410 3411 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3412 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3413 3414 if (call == MAT_INITIAL_MATRIX) { 3415 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3416 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3417 } 3418 PetscFunctionReturn(0); 3419 } 3420 3421 /*@C 3422 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3423 and "off-diagonal" part of the matrix in CSR format. 3424 3425 Collective 3426 3427 Input Parameters: 3428 + comm - MPI communicator 3429 . A - "diagonal" portion of matrix 3430 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3431 - garray - global index of B columns 3432 3433 Output Parameter: 3434 . mat - the matrix, with input A as its local diagonal matrix 3435 Level: advanced 3436 3437 Notes: 3438 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3439 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3440 3441 .seealso: MatCreateMPIAIJWithSplitArrays() 3442 @*/ 3443 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3444 { 3445 PetscErrorCode ierr; 3446 Mat_MPIAIJ *maij; 3447 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3448 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3449 const PetscScalar *oa; 3450 Mat Bnew; 3451 PetscInt m,n,N; 3452 3453 PetscFunctionBegin; 3454 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3455 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3456 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3457 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3458 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3459 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3460 3461 /* Get global columns of mat */ 3462 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3463 3464 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3465 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3466 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3467 maij = (Mat_MPIAIJ*)(*mat)->data; 3468 3469 (*mat)->preallocated = PETSC_TRUE; 3470 3471 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3472 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3473 3474 /* Set A as diagonal portion of *mat */ 3475 maij->A = A; 3476 3477 nz = oi[m]; 3478 for (i=0; i<nz; i++) { 3479 col = oj[i]; 3480 oj[i] = garray[col]; 3481 } 3482 3483 /* Set Bnew as off-diagonal portion of *mat */ 3484 ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr); 3485 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr); 3486 ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr); 3487 bnew = (Mat_SeqAIJ*)Bnew->data; 3488 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3489 maij->B = Bnew; 3490 3491 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3492 3493 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3494 b->free_a = PETSC_FALSE; 3495 b->free_ij = PETSC_FALSE; 3496 ierr = MatDestroy(&B);CHKERRQ(ierr); 3497 3498 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3499 bnew->free_a = PETSC_TRUE; 3500 bnew->free_ij = PETSC_TRUE; 3501 3502 /* condense columns of maij->B */ 3503 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3504 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3505 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3506 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3507 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3508 PetscFunctionReturn(0); 3509 } 3510 3511 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3512 3513 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3514 { 3515 PetscErrorCode ierr; 3516 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3517 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3518 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3519 Mat M,Msub,B=a->B; 3520 MatScalar *aa; 3521 Mat_SeqAIJ *aij; 3522 PetscInt *garray = a->garray,*colsub,Ncols; 3523 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3524 IS iscol_sub,iscmap; 3525 const PetscInt *is_idx,*cmap; 3526 PetscBool allcolumns=PETSC_FALSE; 3527 MPI_Comm comm; 3528 3529 PetscFunctionBegin; 3530 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3531 if (call == MAT_REUSE_MATRIX) { 3532 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3533 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3534 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3535 3536 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3537 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3538 3539 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3540 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3541 3542 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3543 3544 } else { /* call == MAT_INITIAL_MATRIX) */ 3545 PetscBool flg; 3546 3547 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3548 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3549 3550 /* (1) iscol -> nonscalable iscol_local */ 3551 /* Check for special case: each processor gets entire matrix columns */ 3552 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3553 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3554 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3555 if (allcolumns) { 3556 iscol_sub = iscol_local; 3557 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3558 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3559 3560 } else { 3561 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3562 PetscInt *idx,*cmap1,k; 3563 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3564 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3565 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3566 count = 0; 3567 k = 0; 3568 for (i=0; i<Ncols; i++) { 3569 j = is_idx[i]; 3570 if (j >= cstart && j < cend) { 3571 /* diagonal part of mat */ 3572 idx[count] = j; 3573 cmap1[count++] = i; /* column index in submat */ 3574 } else if (Bn) { 3575 /* off-diagonal part of mat */ 3576 if (j == garray[k]) { 3577 idx[count] = j; 3578 cmap1[count++] = i; /* column index in submat */ 3579 } else if (j > garray[k]) { 3580 while (j > garray[k] && k < Bn-1) k++; 3581 if (j == garray[k]) { 3582 idx[count] = j; 3583 cmap1[count++] = i; /* column index in submat */ 3584 } 3585 } 3586 } 3587 } 3588 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3589 3590 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3591 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3592 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3593 3594 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3595 } 3596 3597 /* (3) Create sequential Msub */ 3598 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3599 } 3600 3601 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3602 aij = (Mat_SeqAIJ*)(Msub)->data; 3603 ii = aij->i; 3604 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3605 3606 /* 3607 m - number of local rows 3608 Ncols - number of columns (same on all processors) 3609 rstart - first row in new global matrix generated 3610 */ 3611 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3612 3613 if (call == MAT_INITIAL_MATRIX) { 3614 /* (4) Create parallel newmat */ 3615 PetscMPIInt rank,size; 3616 PetscInt csize; 3617 3618 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3619 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3620 3621 /* 3622 Determine the number of non-zeros in the diagonal and off-diagonal 3623 portions of the matrix in order to do correct preallocation 3624 */ 3625 3626 /* first get start and end of "diagonal" columns */ 3627 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3628 if (csize == PETSC_DECIDE) { 3629 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3630 if (mglobal == Ncols) { /* square matrix */ 3631 nlocal = m; 3632 } else { 3633 nlocal = Ncols/size + ((Ncols % size) > rank); 3634 } 3635 } else { 3636 nlocal = csize; 3637 } 3638 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3639 rstart = rend - nlocal; 3640 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3641 3642 /* next, compute all the lengths */ 3643 jj = aij->j; 3644 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3645 olens = dlens + m; 3646 for (i=0; i<m; i++) { 3647 jend = ii[i+1] - ii[i]; 3648 olen = 0; 3649 dlen = 0; 3650 for (j=0; j<jend; j++) { 3651 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3652 else dlen++; 3653 jj++; 3654 } 3655 olens[i] = olen; 3656 dlens[i] = dlen; 3657 } 3658 3659 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3660 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3661 3662 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3663 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3664 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3665 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3666 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3667 ierr = PetscFree(dlens);CHKERRQ(ierr); 3668 3669 } else { /* call == MAT_REUSE_MATRIX */ 3670 M = *newmat; 3671 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3672 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3673 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3674 /* 3675 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3676 rather than the slower MatSetValues(). 3677 */ 3678 M->was_assembled = PETSC_TRUE; 3679 M->assembled = PETSC_FALSE; 3680 } 3681 3682 /* (5) Set values of Msub to *newmat */ 3683 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3684 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3685 3686 jj = aij->j; 3687 ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3688 for (i=0; i<m; i++) { 3689 row = rstart + i; 3690 nz = ii[i+1] - ii[i]; 3691 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3692 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3693 jj += nz; aa += nz; 3694 } 3695 ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3696 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3697 3698 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3699 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3700 3701 ierr = PetscFree(colsub);CHKERRQ(ierr); 3702 3703 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3704 if (call == MAT_INITIAL_MATRIX) { 3705 *newmat = M; 3706 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3707 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3708 3709 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3710 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3711 3712 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3713 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3714 3715 if (iscol_local) { 3716 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3717 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3718 } 3719 } 3720 PetscFunctionReturn(0); 3721 } 3722 3723 /* 3724 Not great since it makes two copies of the submatrix, first an SeqAIJ 3725 in local and then by concatenating the local matrices the end result. 3726 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3727 3728 Note: This requires a sequential iscol with all indices. 3729 */ 3730 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3731 { 3732 PetscErrorCode ierr; 3733 PetscMPIInt rank,size; 3734 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3735 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3736 Mat M,Mreuse; 3737 MatScalar *aa,*vwork; 3738 MPI_Comm comm; 3739 Mat_SeqAIJ *aij; 3740 PetscBool colflag,allcolumns=PETSC_FALSE; 3741 3742 PetscFunctionBegin; 3743 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3744 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3745 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3746 3747 /* Check for special case: each processor gets entire matrix columns */ 3748 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3749 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3750 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3751 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3752 3753 if (call == MAT_REUSE_MATRIX) { 3754 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3755 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3756 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3757 } else { 3758 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3759 } 3760 3761 /* 3762 m - number of local rows 3763 n - number of columns (same on all processors) 3764 rstart - first row in new global matrix generated 3765 */ 3766 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3767 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3768 if (call == MAT_INITIAL_MATRIX) { 3769 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3770 ii = aij->i; 3771 jj = aij->j; 3772 3773 /* 3774 Determine the number of non-zeros in the diagonal and off-diagonal 3775 portions of the matrix in order to do correct preallocation 3776 */ 3777 3778 /* first get start and end of "diagonal" columns */ 3779 if (csize == PETSC_DECIDE) { 3780 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3781 if (mglobal == n) { /* square matrix */ 3782 nlocal = m; 3783 } else { 3784 nlocal = n/size + ((n % size) > rank); 3785 } 3786 } else { 3787 nlocal = csize; 3788 } 3789 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3790 rstart = rend - nlocal; 3791 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3792 3793 /* next, compute all the lengths */ 3794 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3795 olens = dlens + m; 3796 for (i=0; i<m; i++) { 3797 jend = ii[i+1] - ii[i]; 3798 olen = 0; 3799 dlen = 0; 3800 for (j=0; j<jend; j++) { 3801 if (*jj < rstart || *jj >= rend) olen++; 3802 else dlen++; 3803 jj++; 3804 } 3805 olens[i] = olen; 3806 dlens[i] = dlen; 3807 } 3808 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3809 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3810 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3811 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3812 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3813 ierr = PetscFree(dlens);CHKERRQ(ierr); 3814 } else { 3815 PetscInt ml,nl; 3816 3817 M = *newmat; 3818 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3819 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3820 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3821 /* 3822 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3823 rather than the slower MatSetValues(). 3824 */ 3825 M->was_assembled = PETSC_TRUE; 3826 M->assembled = PETSC_FALSE; 3827 } 3828 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3829 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3830 ii = aij->i; 3831 jj = aij->j; 3832 3833 /* trigger copy to CPU if needed */ 3834 ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3835 for (i=0; i<m; i++) { 3836 row = rstart + i; 3837 nz = ii[i+1] - ii[i]; 3838 cwork = jj; jj += nz; 3839 vwork = aa; aa += nz; 3840 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3841 } 3842 ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3843 3844 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3845 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3846 *newmat = M; 3847 3848 /* save submatrix used in processor for next request */ 3849 if (call == MAT_INITIAL_MATRIX) { 3850 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3851 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3852 } 3853 PetscFunctionReturn(0); 3854 } 3855 3856 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3857 { 3858 PetscInt m,cstart, cend,j,nnz,i,d; 3859 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3860 const PetscInt *JJ; 3861 PetscErrorCode ierr; 3862 PetscBool nooffprocentries; 3863 3864 PetscFunctionBegin; 3865 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3866 3867 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3868 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3869 m = B->rmap->n; 3870 cstart = B->cmap->rstart; 3871 cend = B->cmap->rend; 3872 rstart = B->rmap->rstart; 3873 3874 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3875 3876 if (PetscDefined(USE_DEBUG)) { 3877 for (i=0; i<m; i++) { 3878 nnz = Ii[i+1]- Ii[i]; 3879 JJ = J + Ii[i]; 3880 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3881 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3882 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3883 } 3884 } 3885 3886 for (i=0; i<m; i++) { 3887 nnz = Ii[i+1]- Ii[i]; 3888 JJ = J + Ii[i]; 3889 nnz_max = PetscMax(nnz_max,nnz); 3890 d = 0; 3891 for (j=0; j<nnz; j++) { 3892 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3893 } 3894 d_nnz[i] = d; 3895 o_nnz[i] = nnz - d; 3896 } 3897 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3898 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3899 3900 for (i=0; i<m; i++) { 3901 ii = i + rstart; 3902 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3903 } 3904 nooffprocentries = B->nooffprocentries; 3905 B->nooffprocentries = PETSC_TRUE; 3906 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3907 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3908 B->nooffprocentries = nooffprocentries; 3909 3910 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3911 PetscFunctionReturn(0); 3912 } 3913 3914 /*@ 3915 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3916 (the default parallel PETSc format). 3917 3918 Collective 3919 3920 Input Parameters: 3921 + B - the matrix 3922 . i - the indices into j for the start of each local row (starts with zero) 3923 . j - the column indices for each local row (starts with zero) 3924 - v - optional values in the matrix 3925 3926 Level: developer 3927 3928 Notes: 3929 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3930 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3931 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3932 3933 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3934 3935 The format which is used for the sparse matrix input, is equivalent to a 3936 row-major ordering.. i.e for the following matrix, the input data expected is 3937 as shown 3938 3939 $ 1 0 0 3940 $ 2 0 3 P0 3941 $ ------- 3942 $ 4 5 6 P1 3943 $ 3944 $ Process0 [P0]: rows_owned=[0,1] 3945 $ i = {0,1,3} [size = nrow+1 = 2+1] 3946 $ j = {0,0,2} [size = 3] 3947 $ v = {1,2,3} [size = 3] 3948 $ 3949 $ Process1 [P1]: rows_owned=[2] 3950 $ i = {0,3} [size = nrow+1 = 1+1] 3951 $ j = {0,1,2} [size = 3] 3952 $ v = {4,5,6} [size = 3] 3953 3954 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3955 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3956 @*/ 3957 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3958 { 3959 PetscErrorCode ierr; 3960 3961 PetscFunctionBegin; 3962 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3963 PetscFunctionReturn(0); 3964 } 3965 3966 /*@C 3967 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3968 (the default parallel PETSc format). For good matrix assembly performance 3969 the user should preallocate the matrix storage by setting the parameters 3970 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3971 performance can be increased by more than a factor of 50. 3972 3973 Collective 3974 3975 Input Parameters: 3976 + B - the matrix 3977 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3978 (same value is used for all local rows) 3979 . d_nnz - array containing the number of nonzeros in the various rows of the 3980 DIAGONAL portion of the local submatrix (possibly different for each row) 3981 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3982 The size of this array is equal to the number of local rows, i.e 'm'. 3983 For matrices that will be factored, you must leave room for (and set) 3984 the diagonal entry even if it is zero. 3985 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3986 submatrix (same value is used for all local rows). 3987 - o_nnz - array containing the number of nonzeros in the various rows of the 3988 OFF-DIAGONAL portion of the local submatrix (possibly different for 3989 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3990 structure. The size of this array is equal to the number 3991 of local rows, i.e 'm'. 3992 3993 If the *_nnz parameter is given then the *_nz parameter is ignored 3994 3995 The AIJ format (also called the Yale sparse matrix format or 3996 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3997 storage. The stored row and column indices begin with zero. 3998 See Users-Manual: ch_mat for details. 3999 4000 The parallel matrix is partitioned such that the first m0 rows belong to 4001 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4002 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4003 4004 The DIAGONAL portion of the local submatrix of a processor can be defined 4005 as the submatrix which is obtained by extraction the part corresponding to 4006 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4007 first row that belongs to the processor, r2 is the last row belonging to 4008 the this processor, and c1-c2 is range of indices of the local part of a 4009 vector suitable for applying the matrix to. This is an mxn matrix. In the 4010 common case of a square matrix, the row and column ranges are the same and 4011 the DIAGONAL part is also square. The remaining portion of the local 4012 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4013 4014 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4015 4016 You can call MatGetInfo() to get information on how effective the preallocation was; 4017 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4018 You can also run with the option -info and look for messages with the string 4019 malloc in them to see if additional memory allocation was needed. 4020 4021 Example usage: 4022 4023 Consider the following 8x8 matrix with 34 non-zero values, that is 4024 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4025 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4026 as follows: 4027 4028 .vb 4029 1 2 0 | 0 3 0 | 0 4 4030 Proc0 0 5 6 | 7 0 0 | 8 0 4031 9 0 10 | 11 0 0 | 12 0 4032 ------------------------------------- 4033 13 0 14 | 15 16 17 | 0 0 4034 Proc1 0 18 0 | 19 20 21 | 0 0 4035 0 0 0 | 22 23 0 | 24 0 4036 ------------------------------------- 4037 Proc2 25 26 27 | 0 0 28 | 29 0 4038 30 0 0 | 31 32 33 | 0 34 4039 .ve 4040 4041 This can be represented as a collection of submatrices as: 4042 4043 .vb 4044 A B C 4045 D E F 4046 G H I 4047 .ve 4048 4049 Where the submatrices A,B,C are owned by proc0, D,E,F are 4050 owned by proc1, G,H,I are owned by proc2. 4051 4052 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4053 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4054 The 'M','N' parameters are 8,8, and have the same values on all procs. 4055 4056 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4057 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4058 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4059 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4060 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4061 matrix, ans [DF] as another SeqAIJ matrix. 4062 4063 When d_nz, o_nz parameters are specified, d_nz storage elements are 4064 allocated for every row of the local diagonal submatrix, and o_nz 4065 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4066 One way to choose d_nz and o_nz is to use the max nonzerors per local 4067 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4068 In this case, the values of d_nz,o_nz are: 4069 .vb 4070 proc0 : dnz = 2, o_nz = 2 4071 proc1 : dnz = 3, o_nz = 2 4072 proc2 : dnz = 1, o_nz = 4 4073 .ve 4074 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4075 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4076 for proc3. i.e we are using 12+15+10=37 storage locations to store 4077 34 values. 4078 4079 When d_nnz, o_nnz parameters are specified, the storage is specified 4080 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4081 In the above case the values for d_nnz,o_nnz are: 4082 .vb 4083 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4084 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4085 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4086 .ve 4087 Here the space allocated is sum of all the above values i.e 34, and 4088 hence pre-allocation is perfect. 4089 4090 Level: intermediate 4091 4092 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4093 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4094 @*/ 4095 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4096 { 4097 PetscErrorCode ierr; 4098 4099 PetscFunctionBegin; 4100 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4101 PetscValidType(B,1); 4102 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4103 PetscFunctionReturn(0); 4104 } 4105 4106 /*@ 4107 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4108 CSR format for the local rows. 4109 4110 Collective 4111 4112 Input Parameters: 4113 + comm - MPI communicator 4114 . m - number of local rows (Cannot be PETSC_DECIDE) 4115 . n - This value should be the same as the local size used in creating the 4116 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4117 calculated if N is given) For square matrices n is almost always m. 4118 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4119 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4120 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4121 . j - column indices 4122 - a - matrix values 4123 4124 Output Parameter: 4125 . mat - the matrix 4126 4127 Level: intermediate 4128 4129 Notes: 4130 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4131 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4132 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4133 4134 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4135 4136 The format which is used for the sparse matrix input, is equivalent to a 4137 row-major ordering.. i.e for the following matrix, the input data expected is 4138 as shown 4139 4140 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4141 4142 $ 1 0 0 4143 $ 2 0 3 P0 4144 $ ------- 4145 $ 4 5 6 P1 4146 $ 4147 $ Process0 [P0]: rows_owned=[0,1] 4148 $ i = {0,1,3} [size = nrow+1 = 2+1] 4149 $ j = {0,0,2} [size = 3] 4150 $ v = {1,2,3} [size = 3] 4151 $ 4152 $ Process1 [P1]: rows_owned=[2] 4153 $ i = {0,3} [size = nrow+1 = 1+1] 4154 $ j = {0,1,2} [size = 3] 4155 $ v = {4,5,6} [size = 3] 4156 4157 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4158 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4159 @*/ 4160 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4161 { 4162 PetscErrorCode ierr; 4163 4164 PetscFunctionBegin; 4165 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4166 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4167 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4168 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4169 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4170 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4171 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4172 PetscFunctionReturn(0); 4173 } 4174 4175 /*@ 4176 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4177 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4178 4179 Collective 4180 4181 Input Parameters: 4182 + mat - the matrix 4183 . m - number of local rows (Cannot be PETSC_DECIDE) 4184 . n - This value should be the same as the local size used in creating the 4185 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4186 calculated if N is given) For square matrices n is almost always m. 4187 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4188 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4189 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4190 . J - column indices 4191 - v - matrix values 4192 4193 Level: intermediate 4194 4195 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4196 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4197 @*/ 4198 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4199 { 4200 PetscErrorCode ierr; 4201 PetscInt cstart,nnz,i,j; 4202 PetscInt *ld; 4203 PetscBool nooffprocentries; 4204 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4205 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4206 PetscScalar *ad = Ad->a, *ao = Ao->a; 4207 const PetscInt *Adi = Ad->i; 4208 PetscInt ldi,Iii,md; 4209 4210 PetscFunctionBegin; 4211 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4212 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4213 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4214 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4215 4216 cstart = mat->cmap->rstart; 4217 if (!Aij->ld) { 4218 /* count number of entries below block diagonal */ 4219 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4220 Aij->ld = ld; 4221 for (i=0; i<m; i++) { 4222 nnz = Ii[i+1]- Ii[i]; 4223 j = 0; 4224 while (J[j] < cstart && j < nnz) {j++;} 4225 J += nnz; 4226 ld[i] = j; 4227 } 4228 } else { 4229 ld = Aij->ld; 4230 } 4231 4232 for (i=0; i<m; i++) { 4233 nnz = Ii[i+1]- Ii[i]; 4234 Iii = Ii[i]; 4235 ldi = ld[i]; 4236 md = Adi[i+1]-Adi[i]; 4237 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4238 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4239 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4240 ad += md; 4241 ao += nnz - md; 4242 } 4243 nooffprocentries = mat->nooffprocentries; 4244 mat->nooffprocentries = PETSC_TRUE; 4245 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4246 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4247 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4248 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4249 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4250 mat->nooffprocentries = nooffprocentries; 4251 PetscFunctionReturn(0); 4252 } 4253 4254 /*@C 4255 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4256 (the default parallel PETSc format). For good matrix assembly performance 4257 the user should preallocate the matrix storage by setting the parameters 4258 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4259 performance can be increased by more than a factor of 50. 4260 4261 Collective 4262 4263 Input Parameters: 4264 + comm - MPI communicator 4265 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4266 This value should be the same as the local size used in creating the 4267 y vector for the matrix-vector product y = Ax. 4268 . n - This value should be the same as the local size used in creating the 4269 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4270 calculated if N is given) For square matrices n is almost always m. 4271 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4272 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4273 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4274 (same value is used for all local rows) 4275 . d_nnz - array containing the number of nonzeros in the various rows of the 4276 DIAGONAL portion of the local submatrix (possibly different for each row) 4277 or NULL, if d_nz is used to specify the nonzero structure. 4278 The size of this array is equal to the number of local rows, i.e 'm'. 4279 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4280 submatrix (same value is used for all local rows). 4281 - o_nnz - array containing the number of nonzeros in the various rows of the 4282 OFF-DIAGONAL portion of the local submatrix (possibly different for 4283 each row) or NULL, if o_nz is used to specify the nonzero 4284 structure. The size of this array is equal to the number 4285 of local rows, i.e 'm'. 4286 4287 Output Parameter: 4288 . A - the matrix 4289 4290 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4291 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4292 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4293 4294 Notes: 4295 If the *_nnz parameter is given then the *_nz parameter is ignored 4296 4297 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4298 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4299 storage requirements for this matrix. 4300 4301 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4302 processor than it must be used on all processors that share the object for 4303 that argument. 4304 4305 The user MUST specify either the local or global matrix dimensions 4306 (possibly both). 4307 4308 The parallel matrix is partitioned across processors such that the 4309 first m0 rows belong to process 0, the next m1 rows belong to 4310 process 1, the next m2 rows belong to process 2 etc.. where 4311 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4312 values corresponding to [m x N] submatrix. 4313 4314 The columns are logically partitioned with the n0 columns belonging 4315 to 0th partition, the next n1 columns belonging to the next 4316 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4317 4318 The DIAGONAL portion of the local submatrix on any given processor 4319 is the submatrix corresponding to the rows and columns m,n 4320 corresponding to the given processor. i.e diagonal matrix on 4321 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4322 etc. The remaining portion of the local submatrix [m x (N-n)] 4323 constitute the OFF-DIAGONAL portion. The example below better 4324 illustrates this concept. 4325 4326 For a square global matrix we define each processor's diagonal portion 4327 to be its local rows and the corresponding columns (a square submatrix); 4328 each processor's off-diagonal portion encompasses the remainder of the 4329 local matrix (a rectangular submatrix). 4330 4331 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4332 4333 When calling this routine with a single process communicator, a matrix of 4334 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4335 type of communicator, use the construction mechanism 4336 .vb 4337 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4338 .ve 4339 4340 $ MatCreate(...,&A); 4341 $ MatSetType(A,MATMPIAIJ); 4342 $ MatSetSizes(A, m,n,M,N); 4343 $ MatMPIAIJSetPreallocation(A,...); 4344 4345 By default, this format uses inodes (identical nodes) when possible. 4346 We search for consecutive rows with the same nonzero structure, thereby 4347 reusing matrix information to achieve increased efficiency. 4348 4349 Options Database Keys: 4350 + -mat_no_inode - Do not use inodes 4351 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4352 4353 Example usage: 4354 4355 Consider the following 8x8 matrix with 34 non-zero values, that is 4356 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4357 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4358 as follows 4359 4360 .vb 4361 1 2 0 | 0 3 0 | 0 4 4362 Proc0 0 5 6 | 7 0 0 | 8 0 4363 9 0 10 | 11 0 0 | 12 0 4364 ------------------------------------- 4365 13 0 14 | 15 16 17 | 0 0 4366 Proc1 0 18 0 | 19 20 21 | 0 0 4367 0 0 0 | 22 23 0 | 24 0 4368 ------------------------------------- 4369 Proc2 25 26 27 | 0 0 28 | 29 0 4370 30 0 0 | 31 32 33 | 0 34 4371 .ve 4372 4373 This can be represented as a collection of submatrices as 4374 4375 .vb 4376 A B C 4377 D E F 4378 G H I 4379 .ve 4380 4381 Where the submatrices A,B,C are owned by proc0, D,E,F are 4382 owned by proc1, G,H,I are owned by proc2. 4383 4384 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4385 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4386 The 'M','N' parameters are 8,8, and have the same values on all procs. 4387 4388 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4389 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4390 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4391 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4392 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4393 matrix, ans [DF] as another SeqAIJ matrix. 4394 4395 When d_nz, o_nz parameters are specified, d_nz storage elements are 4396 allocated for every row of the local diagonal submatrix, and o_nz 4397 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4398 One way to choose d_nz and o_nz is to use the max nonzerors per local 4399 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4400 In this case, the values of d_nz,o_nz are 4401 .vb 4402 proc0 : dnz = 2, o_nz = 2 4403 proc1 : dnz = 3, o_nz = 2 4404 proc2 : dnz = 1, o_nz = 4 4405 .ve 4406 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4407 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4408 for proc3. i.e we are using 12+15+10=37 storage locations to store 4409 34 values. 4410 4411 When d_nnz, o_nnz parameters are specified, the storage is specified 4412 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4413 In the above case the values for d_nnz,o_nnz are 4414 .vb 4415 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4416 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4417 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4418 .ve 4419 Here the space allocated is sum of all the above values i.e 34, and 4420 hence pre-allocation is perfect. 4421 4422 Level: intermediate 4423 4424 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4425 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4426 @*/ 4427 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4428 { 4429 PetscErrorCode ierr; 4430 PetscMPIInt size; 4431 4432 PetscFunctionBegin; 4433 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4434 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4435 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4436 if (size > 1) { 4437 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4438 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4439 } else { 4440 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4441 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4442 } 4443 PetscFunctionReturn(0); 4444 } 4445 4446 /*@C 4447 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4448 4449 Not collective 4450 4451 Input Parameter: 4452 . A - The MPIAIJ matrix 4453 4454 Output Parameters: 4455 + Ad - The local diagonal block as a SeqAIJ matrix 4456 . Ao - The local off-diagonal block as a SeqAIJ matrix 4457 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4458 4459 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4460 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4461 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4462 local column numbers to global column numbers in the original matrix. 4463 4464 Level: intermediate 4465 4466 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4467 @*/ 4468 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4469 { 4470 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4471 PetscBool flg; 4472 PetscErrorCode ierr; 4473 4474 PetscFunctionBegin; 4475 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4476 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4477 if (Ad) *Ad = a->A; 4478 if (Ao) *Ao = a->B; 4479 if (colmap) *colmap = a->garray; 4480 PetscFunctionReturn(0); 4481 } 4482 4483 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4484 { 4485 PetscErrorCode ierr; 4486 PetscInt m,N,i,rstart,nnz,Ii; 4487 PetscInt *indx; 4488 PetscScalar *values; 4489 4490 PetscFunctionBegin; 4491 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4492 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4493 PetscInt *dnz,*onz,sum,bs,cbs; 4494 4495 if (n == PETSC_DECIDE) { 4496 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4497 } 4498 /* Check sum(n) = N */ 4499 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4500 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4501 4502 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4503 rstart -= m; 4504 4505 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4506 for (i=0; i<m; i++) { 4507 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4508 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4509 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4510 } 4511 4512 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4513 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4514 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4515 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4516 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4517 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4518 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4519 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4520 ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4521 } 4522 4523 /* numeric phase */ 4524 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4525 for (i=0; i<m; i++) { 4526 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4527 Ii = i + rstart; 4528 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4529 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4530 } 4531 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4532 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4533 PetscFunctionReturn(0); 4534 } 4535 4536 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4537 { 4538 PetscErrorCode ierr; 4539 PetscMPIInt rank; 4540 PetscInt m,N,i,rstart,nnz; 4541 size_t len; 4542 const PetscInt *indx; 4543 PetscViewer out; 4544 char *name; 4545 Mat B; 4546 const PetscScalar *values; 4547 4548 PetscFunctionBegin; 4549 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4550 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4551 /* Should this be the type of the diagonal block of A? */ 4552 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4553 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4554 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4555 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4556 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4557 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4558 for (i=0; i<m; i++) { 4559 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4560 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4561 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4562 } 4563 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4564 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4565 4566 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 4567 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4568 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4569 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4570 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4571 ierr = PetscFree(name);CHKERRQ(ierr); 4572 ierr = MatView(B,out);CHKERRQ(ierr); 4573 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4574 ierr = MatDestroy(&B);CHKERRQ(ierr); 4575 PetscFunctionReturn(0); 4576 } 4577 4578 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4579 { 4580 PetscErrorCode ierr; 4581 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4582 4583 PetscFunctionBegin; 4584 if (!merge) PetscFunctionReturn(0); 4585 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4586 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4587 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4588 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4589 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4590 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4591 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4592 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4593 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4594 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4595 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4596 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4597 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4598 ierr = PetscFree(merge);CHKERRQ(ierr); 4599 PetscFunctionReturn(0); 4600 } 4601 4602 #include <../src/mat/utils/freespace.h> 4603 #include <petscbt.h> 4604 4605 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4606 { 4607 PetscErrorCode ierr; 4608 MPI_Comm comm; 4609 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4610 PetscMPIInt size,rank,taga,*len_s; 4611 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4612 PetscInt proc,m; 4613 PetscInt **buf_ri,**buf_rj; 4614 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4615 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4616 MPI_Request *s_waits,*r_waits; 4617 MPI_Status *status; 4618 MatScalar *aa=a->a; 4619 MatScalar **abuf_r,*ba_i; 4620 Mat_Merge_SeqsToMPI *merge; 4621 PetscContainer container; 4622 4623 PetscFunctionBegin; 4624 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4625 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4626 4627 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4628 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4629 4630 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4631 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4632 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4633 4634 bi = merge->bi; 4635 bj = merge->bj; 4636 buf_ri = merge->buf_ri; 4637 buf_rj = merge->buf_rj; 4638 4639 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4640 owners = merge->rowmap->range; 4641 len_s = merge->len_s; 4642 4643 /* send and recv matrix values */ 4644 /*-----------------------------*/ 4645 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4646 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4647 4648 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4649 for (proc=0,k=0; proc<size; proc++) { 4650 if (!len_s[proc]) continue; 4651 i = owners[proc]; 4652 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr); 4653 k++; 4654 } 4655 4656 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);} 4657 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);} 4658 ierr = PetscFree(status);CHKERRQ(ierr); 4659 4660 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4661 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4662 4663 /* insert mat values of mpimat */ 4664 /*----------------------------*/ 4665 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4666 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4667 4668 for (k=0; k<merge->nrecv; k++) { 4669 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4670 nrows = *(buf_ri_k[k]); 4671 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4672 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4673 } 4674 4675 /* set values of ba */ 4676 m = merge->rowmap->n; 4677 for (i=0; i<m; i++) { 4678 arow = owners[rank] + i; 4679 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4680 bnzi = bi[i+1] - bi[i]; 4681 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4682 4683 /* add local non-zero vals of this proc's seqmat into ba */ 4684 anzi = ai[arow+1] - ai[arow]; 4685 aj = a->j + ai[arow]; 4686 aa = a->a + ai[arow]; 4687 nextaj = 0; 4688 for (j=0; nextaj<anzi; j++) { 4689 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4690 ba_i[j] += aa[nextaj++]; 4691 } 4692 } 4693 4694 /* add received vals into ba */ 4695 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4696 /* i-th row */ 4697 if (i == *nextrow[k]) { 4698 anzi = *(nextai[k]+1) - *nextai[k]; 4699 aj = buf_rj[k] + *(nextai[k]); 4700 aa = abuf_r[k] + *(nextai[k]); 4701 nextaj = 0; 4702 for (j=0; nextaj<anzi; j++) { 4703 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4704 ba_i[j] += aa[nextaj++]; 4705 } 4706 } 4707 nextrow[k]++; nextai[k]++; 4708 } 4709 } 4710 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4711 } 4712 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4713 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4714 4715 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4716 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4717 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4718 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4719 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4720 PetscFunctionReturn(0); 4721 } 4722 4723 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4724 { 4725 PetscErrorCode ierr; 4726 Mat B_mpi; 4727 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4728 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4729 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4730 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4731 PetscInt len,proc,*dnz,*onz,bs,cbs; 4732 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4733 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4734 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4735 MPI_Status *status; 4736 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4737 PetscBT lnkbt; 4738 Mat_Merge_SeqsToMPI *merge; 4739 PetscContainer container; 4740 4741 PetscFunctionBegin; 4742 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4743 4744 /* make sure it is a PETSc comm */ 4745 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4746 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4747 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4748 4749 ierr = PetscNew(&merge);CHKERRQ(ierr); 4750 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4751 4752 /* determine row ownership */ 4753 /*---------------------------------------------------------*/ 4754 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4755 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4756 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4757 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4758 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4759 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4760 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4761 4762 m = merge->rowmap->n; 4763 owners = merge->rowmap->range; 4764 4765 /* determine the number of messages to send, their lengths */ 4766 /*---------------------------------------------------------*/ 4767 len_s = merge->len_s; 4768 4769 len = 0; /* length of buf_si[] */ 4770 merge->nsend = 0; 4771 for (proc=0; proc<size; proc++) { 4772 len_si[proc] = 0; 4773 if (proc == rank) { 4774 len_s[proc] = 0; 4775 } else { 4776 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4777 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4778 } 4779 if (len_s[proc]) { 4780 merge->nsend++; 4781 nrows = 0; 4782 for (i=owners[proc]; i<owners[proc+1]; i++) { 4783 if (ai[i+1] > ai[i]) nrows++; 4784 } 4785 len_si[proc] = 2*(nrows+1); 4786 len += len_si[proc]; 4787 } 4788 } 4789 4790 /* determine the number and length of messages to receive for ij-structure */ 4791 /*-------------------------------------------------------------------------*/ 4792 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4793 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4794 4795 /* post the Irecv of j-structure */ 4796 /*-------------------------------*/ 4797 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4798 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4799 4800 /* post the Isend of j-structure */ 4801 /*--------------------------------*/ 4802 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4803 4804 for (proc=0, k=0; proc<size; proc++) { 4805 if (!len_s[proc]) continue; 4806 i = owners[proc]; 4807 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr); 4808 k++; 4809 } 4810 4811 /* receives and sends of j-structure are complete */ 4812 /*------------------------------------------------*/ 4813 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);} 4814 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);} 4815 4816 /* send and recv i-structure */ 4817 /*---------------------------*/ 4818 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4819 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4820 4821 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4822 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4823 for (proc=0,k=0; proc<size; proc++) { 4824 if (!len_s[proc]) continue; 4825 /* form outgoing message for i-structure: 4826 buf_si[0]: nrows to be sent 4827 [1:nrows]: row index (global) 4828 [nrows+1:2*nrows+1]: i-structure index 4829 */ 4830 /*-------------------------------------------*/ 4831 nrows = len_si[proc]/2 - 1; 4832 buf_si_i = buf_si + nrows+1; 4833 buf_si[0] = nrows; 4834 buf_si_i[0] = 0; 4835 nrows = 0; 4836 for (i=owners[proc]; i<owners[proc+1]; i++) { 4837 anzi = ai[i+1] - ai[i]; 4838 if (anzi) { 4839 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4840 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4841 nrows++; 4842 } 4843 } 4844 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr); 4845 k++; 4846 buf_si += len_si[proc]; 4847 } 4848 4849 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);} 4850 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);} 4851 4852 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4853 for (i=0; i<merge->nrecv; i++) { 4854 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4855 } 4856 4857 ierr = PetscFree(len_si);CHKERRQ(ierr); 4858 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4859 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4860 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4861 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4862 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4863 ierr = PetscFree(status);CHKERRQ(ierr); 4864 4865 /* compute a local seq matrix in each processor */ 4866 /*----------------------------------------------*/ 4867 /* allocate bi array and free space for accumulating nonzero column info */ 4868 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4869 bi[0] = 0; 4870 4871 /* create and initialize a linked list */ 4872 nlnk = N+1; 4873 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4874 4875 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4876 len = ai[owners[rank+1]] - ai[owners[rank]]; 4877 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4878 4879 current_space = free_space; 4880 4881 /* determine symbolic info for each local row */ 4882 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4883 4884 for (k=0; k<merge->nrecv; k++) { 4885 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4886 nrows = *buf_ri_k[k]; 4887 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4888 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4889 } 4890 4891 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4892 len = 0; 4893 for (i=0; i<m; i++) { 4894 bnzi = 0; 4895 /* add local non-zero cols of this proc's seqmat into lnk */ 4896 arow = owners[rank] + i; 4897 anzi = ai[arow+1] - ai[arow]; 4898 aj = a->j + ai[arow]; 4899 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4900 bnzi += nlnk; 4901 /* add received col data into lnk */ 4902 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4903 if (i == *nextrow[k]) { /* i-th row */ 4904 anzi = *(nextai[k]+1) - *nextai[k]; 4905 aj = buf_rj[k] + *nextai[k]; 4906 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4907 bnzi += nlnk; 4908 nextrow[k]++; nextai[k]++; 4909 } 4910 } 4911 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4912 4913 /* if free space is not available, make more free space */ 4914 if (current_space->local_remaining<bnzi) { 4915 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4916 nspacedouble++; 4917 } 4918 /* copy data into free space, then initialize lnk */ 4919 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4920 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4921 4922 current_space->array += bnzi; 4923 current_space->local_used += bnzi; 4924 current_space->local_remaining -= bnzi; 4925 4926 bi[i+1] = bi[i] + bnzi; 4927 } 4928 4929 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4930 4931 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4932 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4933 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4934 4935 /* create symbolic parallel matrix B_mpi */ 4936 /*---------------------------------------*/ 4937 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4938 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4939 if (n==PETSC_DECIDE) { 4940 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4941 } else { 4942 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4943 } 4944 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4945 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4946 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4947 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4948 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4949 4950 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4951 B_mpi->assembled = PETSC_FALSE; 4952 merge->bi = bi; 4953 merge->bj = bj; 4954 merge->buf_ri = buf_ri; 4955 merge->buf_rj = buf_rj; 4956 merge->coi = NULL; 4957 merge->coj = NULL; 4958 merge->owners_co = NULL; 4959 4960 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4961 4962 /* attach the supporting struct to B_mpi for reuse */ 4963 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4964 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4965 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 4966 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4967 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4968 *mpimat = B_mpi; 4969 4970 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4971 PetscFunctionReturn(0); 4972 } 4973 4974 /*@C 4975 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4976 matrices from each processor 4977 4978 Collective 4979 4980 Input Parameters: 4981 + comm - the communicators the parallel matrix will live on 4982 . seqmat - the input sequential matrices 4983 . m - number of local rows (or PETSC_DECIDE) 4984 . n - number of local columns (or PETSC_DECIDE) 4985 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4986 4987 Output Parameter: 4988 . mpimat - the parallel matrix generated 4989 4990 Level: advanced 4991 4992 Notes: 4993 The dimensions of the sequential matrix in each processor MUST be the same. 4994 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4995 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4996 @*/ 4997 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4998 { 4999 PetscErrorCode ierr; 5000 PetscMPIInt size; 5001 5002 PetscFunctionBegin; 5003 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5004 if (size == 1) { 5005 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5006 if (scall == MAT_INITIAL_MATRIX) { 5007 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5008 } else { 5009 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5010 } 5011 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5012 PetscFunctionReturn(0); 5013 } 5014 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5015 if (scall == MAT_INITIAL_MATRIX) { 5016 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5017 } 5018 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5019 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5020 PetscFunctionReturn(0); 5021 } 5022 5023 /*@ 5024 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5025 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5026 with MatGetSize() 5027 5028 Not Collective 5029 5030 Input Parameters: 5031 + A - the matrix 5032 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5033 5034 Output Parameter: 5035 . A_loc - the local sequential matrix generated 5036 5037 Level: developer 5038 5039 Notes: 5040 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5041 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5042 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5043 modify the values of the returned A_loc. 5044 5045 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 5046 @*/ 5047 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5048 { 5049 PetscErrorCode ierr; 5050 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5051 Mat_SeqAIJ *mat,*a,*b; 5052 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5053 const PetscScalar *aa,*ba,*aav,*bav; 5054 PetscScalar *ca,*cam; 5055 PetscMPIInt size; 5056 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5057 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5058 PetscBool match; 5059 5060 PetscFunctionBegin; 5061 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5062 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5063 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5064 if (size == 1) { 5065 if (scall == MAT_INITIAL_MATRIX) { 5066 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5067 *A_loc = mpimat->A; 5068 } else if (scall == MAT_REUSE_MATRIX) { 5069 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5070 } 5071 PetscFunctionReturn(0); 5072 } 5073 5074 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5075 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5076 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5077 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5078 ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5079 ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5080 aa = aav; 5081 ba = bav; 5082 if (scall == MAT_INITIAL_MATRIX) { 5083 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5084 ci[0] = 0; 5085 for (i=0; i<am; i++) { 5086 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5087 } 5088 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5089 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5090 k = 0; 5091 for (i=0; i<am; i++) { 5092 ncols_o = bi[i+1] - bi[i]; 5093 ncols_d = ai[i+1] - ai[i]; 5094 /* off-diagonal portion of A */ 5095 for (jo=0; jo<ncols_o; jo++) { 5096 col = cmap[*bj]; 5097 if (col >= cstart) break; 5098 cj[k] = col; bj++; 5099 ca[k++] = *ba++; 5100 } 5101 /* diagonal portion of A */ 5102 for (j=0; j<ncols_d; j++) { 5103 cj[k] = cstart + *aj++; 5104 ca[k++] = *aa++; 5105 } 5106 /* off-diagonal portion of A */ 5107 for (j=jo; j<ncols_o; j++) { 5108 cj[k] = cmap[*bj++]; 5109 ca[k++] = *ba++; 5110 } 5111 } 5112 /* put together the new matrix */ 5113 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5114 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5115 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5116 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5117 mat->free_a = PETSC_TRUE; 5118 mat->free_ij = PETSC_TRUE; 5119 mat->nonew = 0; 5120 } else if (scall == MAT_REUSE_MATRIX) { 5121 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5122 #if defined(PETSC_USE_DEVICE) 5123 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5124 #endif 5125 ci = mat->i; cj = mat->j; cam = mat->a; 5126 for (i=0; i<am; i++) { 5127 /* off-diagonal portion of A */ 5128 ncols_o = bi[i+1] - bi[i]; 5129 for (jo=0; jo<ncols_o; jo++) { 5130 col = cmap[*bj]; 5131 if (col >= cstart) break; 5132 *cam++ = *ba++; bj++; 5133 } 5134 /* diagonal portion of A */ 5135 ncols_d = ai[i+1] - ai[i]; 5136 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5137 /* off-diagonal portion of A */ 5138 for (j=jo; j<ncols_o; j++) { 5139 *cam++ = *ba++; bj++; 5140 } 5141 } 5142 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5143 ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5144 ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5145 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5146 PetscFunctionReturn(0); 5147 } 5148 5149 /*@ 5150 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5151 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5152 5153 Not Collective 5154 5155 Input Parameters: 5156 + A - the matrix 5157 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5158 5159 Output Parameters: 5160 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5161 - A_loc - the local sequential matrix generated 5162 5163 Level: developer 5164 5165 Notes: 5166 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5167 5168 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5169 5170 @*/ 5171 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5172 { 5173 PetscErrorCode ierr; 5174 Mat Ao,Ad; 5175 const PetscInt *cmap; 5176 PetscMPIInt size; 5177 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5178 5179 PetscFunctionBegin; 5180 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr); 5181 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5182 if (size == 1) { 5183 if (scall == MAT_INITIAL_MATRIX) { 5184 ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr); 5185 *A_loc = Ad; 5186 } else if (scall == MAT_REUSE_MATRIX) { 5187 ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5188 } 5189 if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); } 5190 PetscFunctionReturn(0); 5191 } 5192 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr); 5193 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5194 if (f) { 5195 ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr); 5196 } else { 5197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5198 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5199 Mat_SeqAIJ *c; 5200 PetscInt *ai = a->i, *aj = a->j; 5201 PetscInt *bi = b->i, *bj = b->j; 5202 PetscInt *ci,*cj; 5203 const PetscScalar *aa,*ba; 5204 PetscScalar *ca; 5205 PetscInt i,j,am,dn,on; 5206 5207 ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr); 5208 ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr); 5209 ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr); 5210 ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr); 5211 if (scall == MAT_INITIAL_MATRIX) { 5212 PetscInt k; 5213 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5214 ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr); 5215 ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr); 5216 ci[0] = 0; 5217 for (i=0,k=0; i<am; i++) { 5218 const PetscInt ncols_o = bi[i+1] - bi[i]; 5219 const PetscInt ncols_d = ai[i+1] - ai[i]; 5220 ci[i+1] = ci[i] + ncols_o + ncols_d; 5221 /* diagonal portion of A */ 5222 for (j=0; j<ncols_d; j++,k++) { 5223 cj[k] = *aj++; 5224 ca[k] = *aa++; 5225 } 5226 /* off-diagonal portion of A */ 5227 for (j=0; j<ncols_o; j++,k++) { 5228 cj[k] = dn + *bj++; 5229 ca[k] = *ba++; 5230 } 5231 } 5232 /* put together the new matrix */ 5233 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr); 5234 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5235 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5236 c = (Mat_SeqAIJ*)(*A_loc)->data; 5237 c->free_a = PETSC_TRUE; 5238 c->free_ij = PETSC_TRUE; 5239 c->nonew = 0; 5240 ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr); 5241 } else if (scall == MAT_REUSE_MATRIX) { 5242 #if defined(PETSC_HAVE_DEVICE) 5243 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5244 #endif 5245 c = (Mat_SeqAIJ*)(*A_loc)->data; 5246 ca = c->a; 5247 for (i=0; i<am; i++) { 5248 const PetscInt ncols_d = ai[i+1] - ai[i]; 5249 const PetscInt ncols_o = bi[i+1] - bi[i]; 5250 /* diagonal portion of A */ 5251 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5252 /* off-diagonal portion of A */ 5253 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5254 } 5255 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5256 ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr); 5257 ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr); 5258 if (glob) { 5259 PetscInt cst, *gidx; 5260 5261 ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr); 5262 ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr); 5263 for (i=0; i<dn; i++) gidx[i] = cst + i; 5264 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5265 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr); 5266 } 5267 } 5268 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5269 PetscFunctionReturn(0); 5270 } 5271 5272 /*@C 5273 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5274 5275 Not Collective 5276 5277 Input Parameters: 5278 + A - the matrix 5279 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5280 - row, col - index sets of rows and columns to extract (or NULL) 5281 5282 Output Parameter: 5283 . A_loc - the local sequential matrix generated 5284 5285 Level: developer 5286 5287 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5288 5289 @*/ 5290 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5291 { 5292 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5293 PetscErrorCode ierr; 5294 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5295 IS isrowa,iscola; 5296 Mat *aloc; 5297 PetscBool match; 5298 5299 PetscFunctionBegin; 5300 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5301 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5302 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5303 if (!row) { 5304 start = A->rmap->rstart; end = A->rmap->rend; 5305 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5306 } else { 5307 isrowa = *row; 5308 } 5309 if (!col) { 5310 start = A->cmap->rstart; 5311 cmap = a->garray; 5312 nzA = a->A->cmap->n; 5313 nzB = a->B->cmap->n; 5314 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5315 ncols = 0; 5316 for (i=0; i<nzB; i++) { 5317 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5318 else break; 5319 } 5320 imark = i; 5321 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5322 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5323 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5324 } else { 5325 iscola = *col; 5326 } 5327 if (scall != MAT_INITIAL_MATRIX) { 5328 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5329 aloc[0] = *A_loc; 5330 } 5331 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5332 if (!col) { /* attach global id of condensed columns */ 5333 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5334 } 5335 *A_loc = aloc[0]; 5336 ierr = PetscFree(aloc);CHKERRQ(ierr); 5337 if (!row) { 5338 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5339 } 5340 if (!col) { 5341 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5342 } 5343 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5344 PetscFunctionReturn(0); 5345 } 5346 5347 /* 5348 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5349 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5350 * on a global size. 5351 * */ 5352 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5353 { 5354 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5355 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5356 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5357 PetscMPIInt owner; 5358 PetscSFNode *iremote,*oiremote; 5359 const PetscInt *lrowindices; 5360 PetscErrorCode ierr; 5361 PetscSF sf,osf; 5362 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5363 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5364 MPI_Comm comm; 5365 ISLocalToGlobalMapping mapping; 5366 5367 PetscFunctionBegin; 5368 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5369 /* plocalsize is the number of roots 5370 * nrows is the number of leaves 5371 * */ 5372 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5373 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5374 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5375 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5376 for (i=0;i<nrows;i++) { 5377 /* Find a remote index and an owner for a row 5378 * The row could be local or remote 5379 * */ 5380 owner = 0; 5381 lidx = 0; 5382 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5383 iremote[i].index = lidx; 5384 iremote[i].rank = owner; 5385 } 5386 /* Create SF to communicate how many nonzero columns for each row */ 5387 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5388 /* SF will figure out the number of nonzero colunms for each row, and their 5389 * offsets 5390 * */ 5391 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5392 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5393 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5394 5395 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5396 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5397 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5398 roffsets[0] = 0; 5399 roffsets[1] = 0; 5400 for (i=0;i<plocalsize;i++) { 5401 /* diag */ 5402 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5403 /* off diag */ 5404 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5405 /* compute offsets so that we relative location for each row */ 5406 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5407 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5408 } 5409 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5410 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5411 /* 'r' means root, and 'l' means leaf */ 5412 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5413 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5414 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5415 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5416 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5417 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5418 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5419 dntotalcols = 0; 5420 ontotalcols = 0; 5421 ncol = 0; 5422 for (i=0;i<nrows;i++) { 5423 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5424 ncol = PetscMax(pnnz[i],ncol); 5425 /* diag */ 5426 dntotalcols += nlcols[i*2+0]; 5427 /* off diag */ 5428 ontotalcols += nlcols[i*2+1]; 5429 } 5430 /* We do not need to figure the right number of columns 5431 * since all the calculations will be done by going through the raw data 5432 * */ 5433 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5434 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5435 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5436 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5437 /* diag */ 5438 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5439 /* off diag */ 5440 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5441 /* diag */ 5442 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5443 /* off diag */ 5444 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5445 dntotalcols = 0; 5446 ontotalcols = 0; 5447 ntotalcols = 0; 5448 for (i=0;i<nrows;i++) { 5449 owner = 0; 5450 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5451 /* Set iremote for diag matrix */ 5452 for (j=0;j<nlcols[i*2+0];j++) { 5453 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5454 iremote[dntotalcols].rank = owner; 5455 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5456 ilocal[dntotalcols++] = ntotalcols++; 5457 } 5458 /* off diag */ 5459 for (j=0;j<nlcols[i*2+1];j++) { 5460 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5461 oiremote[ontotalcols].rank = owner; 5462 oilocal[ontotalcols++] = ntotalcols++; 5463 } 5464 } 5465 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5466 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5467 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5468 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5469 /* P serves as roots and P_oth is leaves 5470 * Diag matrix 5471 * */ 5472 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5473 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5474 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5475 5476 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5477 /* Off diag */ 5478 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5479 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5480 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5481 /* We operate on the matrix internal data for saving memory */ 5482 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5483 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5484 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5485 /* Convert to global indices for diag matrix */ 5486 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5487 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5488 /* We want P_oth store global indices */ 5489 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5490 /* Use memory scalable approach */ 5491 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5492 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5493 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5494 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5495 /* Convert back to local indices */ 5496 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5497 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5498 nout = 0; 5499 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5500 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5501 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5502 /* Exchange values */ 5503 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5504 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5505 /* Stop PETSc from shrinking memory */ 5506 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5507 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5508 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5509 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5510 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5511 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5512 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5513 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5514 PetscFunctionReturn(0); 5515 } 5516 5517 /* 5518 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5519 * This supports MPIAIJ and MAIJ 5520 * */ 5521 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5522 { 5523 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5524 Mat_SeqAIJ *p_oth; 5525 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5526 IS rows,map; 5527 PetscHMapI hamp; 5528 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5529 MPI_Comm comm; 5530 PetscSF sf,osf; 5531 PetscBool has; 5532 PetscErrorCode ierr; 5533 5534 PetscFunctionBegin; 5535 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5536 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5537 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5538 * and then create a submatrix (that often is an overlapping matrix) 5539 * */ 5540 if (reuse == MAT_INITIAL_MATRIX) { 5541 /* Use a hash table to figure out unique keys */ 5542 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5543 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5544 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5545 count = 0; 5546 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5547 for (i=0;i<a->B->cmap->n;i++) { 5548 key = a->garray[i]/dof; 5549 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5550 if (!has) { 5551 mapping[i] = count; 5552 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5553 } else { 5554 /* Current 'i' has the same value the previous step */ 5555 mapping[i] = count-1; 5556 } 5557 } 5558 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5559 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5560 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count); 5561 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5562 off = 0; 5563 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5564 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5565 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5566 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5567 /* In case, the matrix was already created but users want to recreate the matrix */ 5568 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5569 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5570 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5571 ierr = ISDestroy(&map);CHKERRQ(ierr); 5572 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5573 } else if (reuse == MAT_REUSE_MATRIX) { 5574 /* If matrix was already created, we simply update values using SF objects 5575 * that as attached to the matrix ealier. 5576 * */ 5577 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5578 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5579 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5580 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5581 /* Update values in place */ 5582 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5583 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5584 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5585 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5586 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5587 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5588 PetscFunctionReturn(0); 5589 } 5590 5591 /*@C 5592 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5593 5594 Collective on Mat 5595 5596 Input Parameters: 5597 + A - the first matrix in mpiaij format 5598 . B - the second matrix in mpiaij format 5599 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5600 5601 Input/Output Parameters: 5602 + rowb - index sets of rows of B to extract (or NULL), modified on output 5603 - colb - index sets of columns of B to extract (or NULL), modified on output 5604 5605 Output Parameter: 5606 . B_seq - the sequential matrix generated 5607 5608 Level: developer 5609 5610 @*/ 5611 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5612 { 5613 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5614 PetscErrorCode ierr; 5615 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5616 IS isrowb,iscolb; 5617 Mat *bseq=NULL; 5618 5619 PetscFunctionBegin; 5620 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5621 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5622 } 5623 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5624 5625 if (scall == MAT_INITIAL_MATRIX) { 5626 start = A->cmap->rstart; 5627 cmap = a->garray; 5628 nzA = a->A->cmap->n; 5629 nzB = a->B->cmap->n; 5630 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5631 ncols = 0; 5632 for (i=0; i<nzB; i++) { /* row < local row index */ 5633 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5634 else break; 5635 } 5636 imark = i; 5637 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5638 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5639 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5640 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5641 } else { 5642 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5643 isrowb = *rowb; iscolb = *colb; 5644 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5645 bseq[0] = *B_seq; 5646 } 5647 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5648 *B_seq = bseq[0]; 5649 ierr = PetscFree(bseq);CHKERRQ(ierr); 5650 if (!rowb) { 5651 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5652 } else { 5653 *rowb = isrowb; 5654 } 5655 if (!colb) { 5656 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5657 } else { 5658 *colb = iscolb; 5659 } 5660 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5661 PetscFunctionReturn(0); 5662 } 5663 5664 /* 5665 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5666 of the OFF-DIAGONAL portion of local A 5667 5668 Collective on Mat 5669 5670 Input Parameters: 5671 + A,B - the matrices in mpiaij format 5672 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5673 5674 Output Parameter: 5675 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5676 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5677 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5678 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5679 5680 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5681 for this matrix. This is not desirable.. 5682 5683 Level: developer 5684 5685 */ 5686 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5687 { 5688 PetscErrorCode ierr; 5689 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5690 Mat_SeqAIJ *b_oth; 5691 VecScatter ctx; 5692 MPI_Comm comm; 5693 const PetscMPIInt *rprocs,*sprocs; 5694 const PetscInt *srow,*rstarts,*sstarts; 5695 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5696 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5697 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5698 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5699 PetscMPIInt size,tag,rank,nreqs; 5700 5701 PetscFunctionBegin; 5702 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5703 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5704 5705 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5706 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5707 } 5708 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5709 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 5710 5711 if (size == 1) { 5712 startsj_s = NULL; 5713 bufa_ptr = NULL; 5714 *B_oth = NULL; 5715 PetscFunctionReturn(0); 5716 } 5717 5718 ctx = a->Mvctx; 5719 tag = ((PetscObject)ctx)->tag; 5720 5721 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5722 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5723 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5724 ierr = PetscMPIIntCast(nsends+nrecvs,&nreqs);CHKERRQ(ierr); 5725 ierr = PetscMalloc1(nreqs,&reqs);CHKERRQ(ierr); 5726 rwaits = reqs; 5727 swaits = reqs + nrecvs; 5728 5729 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5730 if (scall == MAT_INITIAL_MATRIX) { 5731 /* i-array */ 5732 /*---------*/ 5733 /* post receives */ 5734 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5735 for (i=0; i<nrecvs; i++) { 5736 rowlen = rvalues + rstarts[i]*rbs; 5737 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5738 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5739 } 5740 5741 /* pack the outgoing message */ 5742 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5743 5744 sstartsj[0] = 0; 5745 rstartsj[0] = 0; 5746 len = 0; /* total length of j or a array to be sent */ 5747 if (nsends) { 5748 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5749 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5750 } 5751 for (i=0; i<nsends; i++) { 5752 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5753 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5754 for (j=0; j<nrows; j++) { 5755 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5756 for (l=0; l<sbs; l++) { 5757 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5758 5759 rowlen[j*sbs+l] = ncols; 5760 5761 len += ncols; 5762 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5763 } 5764 k++; 5765 } 5766 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5767 5768 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5769 } 5770 /* recvs and sends of i-array are completed */ 5771 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5772 ierr = PetscFree(svalues);CHKERRQ(ierr); 5773 5774 /* allocate buffers for sending j and a arrays */ 5775 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5776 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5777 5778 /* create i-array of B_oth */ 5779 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5780 5781 b_othi[0] = 0; 5782 len = 0; /* total length of j or a array to be received */ 5783 k = 0; 5784 for (i=0; i<nrecvs; i++) { 5785 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5786 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5787 for (j=0; j<nrows; j++) { 5788 b_othi[k+1] = b_othi[k] + rowlen[j]; 5789 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5790 k++; 5791 } 5792 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5793 } 5794 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5795 5796 /* allocate space for j and a arrrays of B_oth */ 5797 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5798 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5799 5800 /* j-array */ 5801 /*---------*/ 5802 /* post receives of j-array */ 5803 for (i=0; i<nrecvs; i++) { 5804 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5805 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5806 } 5807 5808 /* pack the outgoing message j-array */ 5809 if (nsends) k = sstarts[0]; 5810 for (i=0; i<nsends; i++) { 5811 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5812 bufJ = bufj+sstartsj[i]; 5813 for (j=0; j<nrows; j++) { 5814 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5815 for (ll=0; ll<sbs; ll++) { 5816 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5817 for (l=0; l<ncols; l++) { 5818 *bufJ++ = cols[l]; 5819 } 5820 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5821 } 5822 } 5823 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5824 } 5825 5826 /* recvs and sends of j-array are completed */ 5827 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5828 } else if (scall == MAT_REUSE_MATRIX) { 5829 sstartsj = *startsj_s; 5830 rstartsj = *startsj_r; 5831 bufa = *bufa_ptr; 5832 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5833 b_otha = b_oth->a; 5834 #if defined(PETSC_HAVE_DEVICE) 5835 (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU; 5836 #endif 5837 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5838 5839 /* a-array */ 5840 /*---------*/ 5841 /* post receives of a-array */ 5842 for (i=0; i<nrecvs; i++) { 5843 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5844 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5845 } 5846 5847 /* pack the outgoing message a-array */ 5848 if (nsends) k = sstarts[0]; 5849 for (i=0; i<nsends; i++) { 5850 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5851 bufA = bufa+sstartsj[i]; 5852 for (j=0; j<nrows; j++) { 5853 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5854 for (ll=0; ll<sbs; ll++) { 5855 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5856 for (l=0; l<ncols; l++) { 5857 *bufA++ = vals[l]; 5858 } 5859 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5860 } 5861 } 5862 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5863 } 5864 /* recvs and sends of a-array are completed */ 5865 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5866 ierr = PetscFree(reqs);CHKERRQ(ierr); 5867 5868 if (scall == MAT_INITIAL_MATRIX) { 5869 /* put together the new matrix */ 5870 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5871 5872 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5873 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5874 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5875 b_oth->free_a = PETSC_TRUE; 5876 b_oth->free_ij = PETSC_TRUE; 5877 b_oth->nonew = 0; 5878 5879 ierr = PetscFree(bufj);CHKERRQ(ierr); 5880 if (!startsj_s || !bufa_ptr) { 5881 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5882 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5883 } else { 5884 *startsj_s = sstartsj; 5885 *startsj_r = rstartsj; 5886 *bufa_ptr = bufa; 5887 } 5888 } 5889 5890 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5891 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5892 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5893 PetscFunctionReturn(0); 5894 } 5895 5896 /*@C 5897 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5898 5899 Not Collective 5900 5901 Input Parameter: 5902 . A - The matrix in mpiaij format 5903 5904 Output Parameters: 5905 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5906 . colmap - A map from global column index to local index into lvec 5907 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5908 5909 Level: developer 5910 5911 @*/ 5912 #if defined(PETSC_USE_CTABLE) 5913 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5914 #else 5915 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5916 #endif 5917 { 5918 Mat_MPIAIJ *a; 5919 5920 PetscFunctionBegin; 5921 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5922 PetscValidPointer(lvec, 2); 5923 PetscValidPointer(colmap, 3); 5924 PetscValidPointer(multScatter, 4); 5925 a = (Mat_MPIAIJ*) A->data; 5926 if (lvec) *lvec = a->lvec; 5927 if (colmap) *colmap = a->colmap; 5928 if (multScatter) *multScatter = a->Mvctx; 5929 PetscFunctionReturn(0); 5930 } 5931 5932 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5933 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5934 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5935 #if defined(PETSC_HAVE_MKL_SPARSE) 5936 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5937 #endif 5938 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5939 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5940 #if defined(PETSC_HAVE_ELEMENTAL) 5941 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5942 #endif 5943 #if defined(PETSC_HAVE_SCALAPACK) 5944 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5945 #endif 5946 #if defined(PETSC_HAVE_HYPRE) 5947 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5948 #endif 5949 #if defined(PETSC_HAVE_CUDA) 5950 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5951 #endif 5952 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5953 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5954 #endif 5955 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5956 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5957 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5958 5959 /* 5960 Computes (B'*A')' since computing B*A directly is untenable 5961 5962 n p p 5963 [ ] [ ] [ ] 5964 m [ A ] * n [ B ] = m [ C ] 5965 [ ] [ ] [ ] 5966 5967 */ 5968 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5969 { 5970 PetscErrorCode ierr; 5971 Mat At,Bt,Ct; 5972 5973 PetscFunctionBegin; 5974 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5975 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5976 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5977 ierr = MatDestroy(&At);CHKERRQ(ierr); 5978 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5979 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5980 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5981 PetscFunctionReturn(0); 5982 } 5983 5984 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5985 { 5986 PetscErrorCode ierr; 5987 PetscBool cisdense; 5988 5989 PetscFunctionBegin; 5990 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5991 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 5992 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5993 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 5994 if (!cisdense) { 5995 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 5996 } 5997 ierr = MatSetUp(C);CHKERRQ(ierr); 5998 5999 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6000 PetscFunctionReturn(0); 6001 } 6002 6003 /* ----------------------------------------------------------------*/ 6004 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6005 { 6006 Mat_Product *product = C->product; 6007 Mat A = product->A,B=product->B; 6008 6009 PetscFunctionBegin; 6010 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6011 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6012 6013 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6014 C->ops->productsymbolic = MatProductSymbolic_AB; 6015 PetscFunctionReturn(0); 6016 } 6017 6018 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6019 { 6020 PetscErrorCode ierr; 6021 Mat_Product *product = C->product; 6022 6023 PetscFunctionBegin; 6024 if (product->type == MATPRODUCT_AB) { 6025 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 6026 } 6027 PetscFunctionReturn(0); 6028 } 6029 /* ----------------------------------------------------------------*/ 6030 6031 /*MC 6032 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6033 6034 Options Database Keys: 6035 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6036 6037 Level: beginner 6038 6039 Notes: 6040 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6041 in this case the values associated with the rows and columns one passes in are set to zero 6042 in the matrix 6043 6044 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6045 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6046 6047 .seealso: MatCreateAIJ() 6048 M*/ 6049 6050 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6051 { 6052 Mat_MPIAIJ *b; 6053 PetscErrorCode ierr; 6054 PetscMPIInt size; 6055 6056 PetscFunctionBegin; 6057 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 6058 6059 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6060 B->data = (void*)b; 6061 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6062 B->assembled = PETSC_FALSE; 6063 B->insertmode = NOT_SET_VALUES; 6064 b->size = size; 6065 6066 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr); 6067 6068 /* build cache for off array entries formed */ 6069 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6070 6071 b->donotstash = PETSC_FALSE; 6072 b->colmap = NULL; 6073 b->garray = NULL; 6074 b->roworiented = PETSC_TRUE; 6075 6076 /* stuff used for matrix vector multiply */ 6077 b->lvec = NULL; 6078 b->Mvctx = NULL; 6079 6080 /* stuff for MatGetRow() */ 6081 b->rowindices = NULL; 6082 b->rowvalues = NULL; 6083 b->getrowactive = PETSC_FALSE; 6084 6085 /* flexible pointer used in CUSPARSE classes */ 6086 b->spptr = NULL; 6087 6088 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6089 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6090 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6092 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6093 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6094 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6095 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6096 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6098 #if defined(PETSC_HAVE_CUDA) 6099 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr); 6100 #endif 6101 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6102 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr); 6103 #endif 6104 #if defined(PETSC_HAVE_MKL_SPARSE) 6105 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6106 #endif 6107 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6108 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6109 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6110 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr); 6111 #if defined(PETSC_HAVE_ELEMENTAL) 6112 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6113 #endif 6114 #if defined(PETSC_HAVE_SCALAPACK) 6115 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6116 #endif 6117 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6118 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6119 #if defined(PETSC_HAVE_HYPRE) 6120 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6121 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6122 #endif 6123 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6124 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6125 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6126 PetscFunctionReturn(0); 6127 } 6128 6129 /*@C 6130 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6131 and "off-diagonal" part of the matrix in CSR format. 6132 6133 Collective 6134 6135 Input Parameters: 6136 + comm - MPI communicator 6137 . m - number of local rows (Cannot be PETSC_DECIDE) 6138 . n - This value should be the same as the local size used in creating the 6139 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6140 calculated if N is given) For square matrices n is almost always m. 6141 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6142 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6143 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6144 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6145 . a - matrix values 6146 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6147 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6148 - oa - matrix values 6149 6150 Output Parameter: 6151 . mat - the matrix 6152 6153 Level: advanced 6154 6155 Notes: 6156 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6157 must free the arrays once the matrix has been destroyed and not before. 6158 6159 The i and j indices are 0 based 6160 6161 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6162 6163 This sets local rows and cannot be used to set off-processor values. 6164 6165 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6166 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6167 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6168 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6169 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6170 communication if it is known that only local entries will be set. 6171 6172 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6173 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6174 @*/ 6175 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6176 { 6177 PetscErrorCode ierr; 6178 Mat_MPIAIJ *maij; 6179 6180 PetscFunctionBegin; 6181 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6182 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6183 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6184 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6185 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6186 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6187 maij = (Mat_MPIAIJ*) (*mat)->data; 6188 6189 (*mat)->preallocated = PETSC_TRUE; 6190 6191 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6192 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6193 6194 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6195 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6196 6197 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6198 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6199 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6200 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6201 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6202 PetscFunctionReturn(0); 6203 } 6204 6205 /* 6206 Special version for direct calls from Fortran 6207 */ 6208 #include <petsc/private/fortranimpl.h> 6209 6210 /* Change these macros so can be used in void function */ 6211 #undef CHKERRQ 6212 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6213 #undef SETERRQ2 6214 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6215 #undef SETERRQ3 6216 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6217 #undef SETERRQ 6218 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6219 6220 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6221 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6222 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6223 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6224 #else 6225 #endif 6226 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6227 { 6228 Mat mat = *mmat; 6229 PetscInt m = *mm, n = *mn; 6230 InsertMode addv = *maddv; 6231 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6232 PetscScalar value; 6233 PetscErrorCode ierr; 6234 6235 MatCheckPreallocated(mat,1); 6236 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6237 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6238 { 6239 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6240 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6241 PetscBool roworiented = aij->roworiented; 6242 6243 /* Some Variables required in the macro */ 6244 Mat A = aij->A; 6245 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6246 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6247 MatScalar *aa = a->a; 6248 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6249 Mat B = aij->B; 6250 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6251 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6252 MatScalar *ba = b->a; 6253 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6254 * cannot use "#if defined" inside a macro. */ 6255 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6256 6257 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6258 PetscInt nonew = a->nonew; 6259 MatScalar *ap1,*ap2; 6260 6261 PetscFunctionBegin; 6262 for (i=0; i<m; i++) { 6263 if (im[i] < 0) continue; 6264 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6265 if (im[i] >= rstart && im[i] < rend) { 6266 row = im[i] - rstart; 6267 lastcol1 = -1; 6268 rp1 = aj + ai[row]; 6269 ap1 = aa + ai[row]; 6270 rmax1 = aimax[row]; 6271 nrow1 = ailen[row]; 6272 low1 = 0; 6273 high1 = nrow1; 6274 lastcol2 = -1; 6275 rp2 = bj + bi[row]; 6276 ap2 = ba + bi[row]; 6277 rmax2 = bimax[row]; 6278 nrow2 = bilen[row]; 6279 low2 = 0; 6280 high2 = nrow2; 6281 6282 for (j=0; j<n; j++) { 6283 if (roworiented) value = v[i*n+j]; 6284 else value = v[i+j*m]; 6285 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6286 if (in[j] >= cstart && in[j] < cend) { 6287 col = in[j] - cstart; 6288 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6289 #if defined(PETSC_HAVE_DEVICE) 6290 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6291 #endif 6292 } else if (in[j] < 0) continue; 6293 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6294 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6295 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6296 } else { 6297 if (mat->was_assembled) { 6298 if (!aij->colmap) { 6299 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6300 } 6301 #if defined(PETSC_USE_CTABLE) 6302 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6303 col--; 6304 #else 6305 col = aij->colmap[in[j]] - 1; 6306 #endif 6307 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6308 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6309 col = in[j]; 6310 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6311 B = aij->B; 6312 b = (Mat_SeqAIJ*)B->data; 6313 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6314 rp2 = bj + bi[row]; 6315 ap2 = ba + bi[row]; 6316 rmax2 = bimax[row]; 6317 nrow2 = bilen[row]; 6318 low2 = 0; 6319 high2 = nrow2; 6320 bm = aij->B->rmap->n; 6321 ba = b->a; 6322 inserted = PETSC_FALSE; 6323 } 6324 } else col = in[j]; 6325 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6326 #if defined(PETSC_HAVE_DEVICE) 6327 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6328 #endif 6329 } 6330 } 6331 } else if (!aij->donotstash) { 6332 if (roworiented) { 6333 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6334 } else { 6335 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6336 } 6337 } 6338 } 6339 } 6340 PetscFunctionReturnVoid(); 6341 } 6342 6343 typedef struct { 6344 Mat *mp; /* intermediate products */ 6345 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6346 PetscInt cp; /* number of intermediate products */ 6347 6348 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6349 PetscInt *startsj_s,*startsj_r; 6350 PetscScalar *bufa; 6351 Mat P_oth; 6352 6353 /* may take advantage of merging product->B */ 6354 Mat Bloc; /* B-local by merging diag and off-diag */ 6355 6356 /* cusparse does not have support to split between symbolic and numeric phases. 6357 When api_user is true, we don't need to update the numerical values 6358 of the temporary storage */ 6359 PetscBool reusesym; 6360 6361 /* support for COO values insertion */ 6362 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6363 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6364 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6365 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6366 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6367 PetscMemType mtype; 6368 6369 /* customization */ 6370 PetscBool abmerge; 6371 PetscBool P_oth_bind; 6372 } MatMatMPIAIJBACKEND; 6373 6374 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6375 { 6376 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6377 PetscInt i; 6378 PetscErrorCode ierr; 6379 6380 PetscFunctionBegin; 6381 ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr); 6382 ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr); 6383 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr); 6384 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr); 6385 ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr); 6386 ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr); 6387 ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr); 6388 for (i = 0; i < mmdata->cp; i++) { 6389 ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr); 6390 } 6391 ierr = PetscFree2(mmdata->mp,mmdata->mptmp);CHKERRQ(ierr); 6392 ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr); 6393 ierr = PetscFree(mmdata->own);CHKERRQ(ierr); 6394 ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr); 6395 ierr = PetscFree(mmdata->off);CHKERRQ(ierr); 6396 ierr = PetscFree(mmdata);CHKERRQ(ierr); 6397 PetscFunctionReturn(0); 6398 } 6399 6400 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6401 { 6402 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6403 PetscErrorCode ierr; 6404 6405 PetscFunctionBegin; 6406 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr); 6407 if (f) { 6408 ierr = (*f)(A,n,idx,v);CHKERRQ(ierr); 6409 } else { 6410 const PetscScalar *vv; 6411 6412 ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr); 6413 if (n && idx) { 6414 PetscScalar *w = v; 6415 const PetscInt *oi = idx; 6416 PetscInt j; 6417 6418 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6419 } else { 6420 ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr); 6421 } 6422 ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr); 6423 } 6424 PetscFunctionReturn(0); 6425 } 6426 6427 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6428 { 6429 MatMatMPIAIJBACKEND *mmdata; 6430 PetscInt i,n_d,n_o; 6431 PetscErrorCode ierr; 6432 6433 PetscFunctionBegin; 6434 MatCheckProduct(C,1); 6435 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6436 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6437 if (!mmdata->reusesym) { /* update temporary matrices */ 6438 if (mmdata->P_oth) { 6439 ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6440 } 6441 if (mmdata->Bloc) { 6442 ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr); 6443 } 6444 } 6445 mmdata->reusesym = PETSC_FALSE; 6446 6447 for (i = 0; i < mmdata->cp; i++) { 6448 if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6449 ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr); 6450 } 6451 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6452 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6453 6454 if (mmdata->mptmp[i]) continue; 6455 if (noff) { 6456 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6457 6458 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr); 6459 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr); 6460 n_o += noff; 6461 n_d += nown; 6462 } else { 6463 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6464 6465 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr); 6466 n_d += mm->nz; 6467 } 6468 } 6469 if (mmdata->hasoffproc) { /* offprocess insertion */ 6470 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6471 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6472 } 6473 ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr); 6474 PetscFunctionReturn(0); 6475 } 6476 6477 /* Support for Pt * A, A * P, or Pt * A * P */ 6478 #define MAX_NUMBER_INTERMEDIATE 4 6479 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6480 { 6481 Mat_Product *product = C->product; 6482 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6483 Mat_MPIAIJ *a,*p; 6484 MatMatMPIAIJBACKEND *mmdata; 6485 ISLocalToGlobalMapping P_oth_l2g = NULL; 6486 IS glob = NULL; 6487 const char *prefix; 6488 char pprefix[256]; 6489 const PetscInt *globidx,*P_oth_idx; 6490 PetscInt i,j,cp,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j; 6491 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6492 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6493 /* a base offset; type-2: sparse with a local to global map table */ 6494 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6495 6496 MatProductType ptype; 6497 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6498 PetscMPIInt size; 6499 PetscErrorCode ierr; 6500 6501 PetscFunctionBegin; 6502 MatCheckProduct(C,1); 6503 if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6504 ptype = product->type; 6505 if (product->A->symmetric && ptype == MATPRODUCT_AtB) ptype = MATPRODUCT_AB; 6506 switch (ptype) { 6507 case MATPRODUCT_AB: 6508 A = product->A; 6509 P = product->B; 6510 m = A->rmap->n; 6511 n = P->cmap->n; 6512 M = A->rmap->N; 6513 N = P->cmap->N; 6514 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6515 break; 6516 case MATPRODUCT_AtB: 6517 P = product->A; 6518 A = product->B; 6519 m = P->cmap->n; 6520 n = A->cmap->n; 6521 M = P->cmap->N; 6522 N = A->cmap->N; 6523 hasoffproc = PETSC_TRUE; 6524 break; 6525 case MATPRODUCT_PtAP: 6526 A = product->A; 6527 P = product->B; 6528 m = P->cmap->n; 6529 n = P->cmap->n; 6530 M = P->cmap->N; 6531 N = P->cmap->N; 6532 hasoffproc = PETSC_TRUE; 6533 break; 6534 default: 6535 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6536 } 6537 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr); 6538 if (size == 1) hasoffproc = PETSC_FALSE; 6539 6540 /* defaults */ 6541 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6542 mp[i] = NULL; 6543 mptmp[i] = PETSC_FALSE; 6544 rmapt[i] = -1; 6545 cmapt[i] = -1; 6546 rmapa[i] = NULL; 6547 cmapa[i] = NULL; 6548 } 6549 6550 /* customization */ 6551 ierr = PetscNew(&mmdata);CHKERRQ(ierr); 6552 mmdata->reusesym = product->api_user; 6553 if (ptype == MATPRODUCT_AB) { 6554 if (product->api_user) { 6555 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 6556 ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6557 ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6558 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6559 } else { 6560 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 6561 ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6562 ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6563 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6564 } 6565 } else if (ptype == MATPRODUCT_PtAP) { 6566 if (product->api_user) { 6567 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 6568 ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6569 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6570 } else { 6571 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 6572 ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6573 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6574 } 6575 } 6576 a = (Mat_MPIAIJ*)A->data; 6577 p = (Mat_MPIAIJ*)P->data; 6578 ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr); 6579 ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr); 6580 ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr); 6581 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6582 ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr); 6583 6584 cp = 0; 6585 switch (ptype) { 6586 case MATPRODUCT_AB: /* A * P */ 6587 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6588 6589 /* A_diag * P_local (merged or not) */ 6590 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 6591 /* P is product->B */ 6592 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6593 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6594 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6595 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6596 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6597 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6598 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6599 mp[cp]->product->api_user = product->api_user; 6600 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6601 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6602 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6603 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6604 rmapt[cp] = 1; 6605 cmapt[cp] = 2; 6606 cmapa[cp] = globidx; 6607 mptmp[cp] = PETSC_FALSE; 6608 cp++; 6609 } else { /* A_diag * P_diag and A_diag * P_off */ 6610 ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr); 6611 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6612 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6613 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6614 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6615 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6616 mp[cp]->product->api_user = product->api_user; 6617 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6618 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6619 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6620 rmapt[cp] = 1; 6621 cmapt[cp] = 1; 6622 mptmp[cp] = PETSC_FALSE; 6623 cp++; 6624 ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr); 6625 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6626 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6627 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6628 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6629 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6630 mp[cp]->product->api_user = product->api_user; 6631 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6632 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6633 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6634 rmapt[cp] = 1; 6635 cmapt[cp] = 2; 6636 cmapa[cp] = p->garray; 6637 mptmp[cp] = PETSC_FALSE; 6638 cp++; 6639 } 6640 6641 /* A_off * P_other */ 6642 if (mmdata->P_oth) { 6643 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); /* make P_oth use local col ids */ 6644 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6645 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6646 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6647 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6648 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6649 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6650 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6651 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6652 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6653 mp[cp]->product->api_user = product->api_user; 6654 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6655 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6656 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6657 rmapt[cp] = 1; 6658 cmapt[cp] = 2; 6659 cmapa[cp] = P_oth_idx; 6660 mptmp[cp] = PETSC_FALSE; 6661 cp++; 6662 } 6663 break; 6664 6665 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 6666 /* A is product->B */ 6667 ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6668 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 6669 ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6670 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6671 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6672 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6673 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6674 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6675 mp[cp]->product->api_user = product->api_user; 6676 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6677 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6678 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6679 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6680 rmapt[cp] = 2; 6681 rmapa[cp] = globidx; 6682 cmapt[cp] = 2; 6683 cmapa[cp] = globidx; 6684 mptmp[cp] = PETSC_FALSE; 6685 cp++; 6686 } else { 6687 ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6688 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6689 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6690 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6691 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6692 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6693 mp[cp]->product->api_user = product->api_user; 6694 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6695 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6696 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6697 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6698 rmapt[cp] = 1; 6699 cmapt[cp] = 2; 6700 cmapa[cp] = globidx; 6701 mptmp[cp] = PETSC_FALSE; 6702 cp++; 6703 ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6704 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6705 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6706 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6707 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6708 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6709 mp[cp]->product->api_user = product->api_user; 6710 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6711 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6712 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6713 rmapt[cp] = 2; 6714 rmapa[cp] = p->garray; 6715 cmapt[cp] = 2; 6716 cmapa[cp] = globidx; 6717 mptmp[cp] = PETSC_FALSE; 6718 cp++; 6719 } 6720 break; 6721 case MATPRODUCT_PtAP: 6722 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6723 /* P is product->B */ 6724 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6725 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6726 ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr); 6727 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6728 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6729 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6730 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6731 mp[cp]->product->api_user = product->api_user; 6732 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6733 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6734 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6735 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6736 rmapt[cp] = 2; 6737 rmapa[cp] = globidx; 6738 cmapt[cp] = 2; 6739 cmapa[cp] = globidx; 6740 mptmp[cp] = PETSC_FALSE; 6741 cp++; 6742 if (mmdata->P_oth) { 6743 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); 6744 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6745 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6746 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6747 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6748 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6749 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6750 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6751 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6752 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6753 mp[cp]->product->api_user = product->api_user; 6754 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6755 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6756 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6757 mptmp[cp] = PETSC_TRUE; 6758 cp++; 6759 ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr); 6760 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6761 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6762 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6763 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6764 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6765 mp[cp]->product->api_user = product->api_user; 6766 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6767 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6768 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6769 rmapt[cp] = 2; 6770 rmapa[cp] = globidx; 6771 cmapt[cp] = 2; 6772 cmapa[cp] = P_oth_idx; 6773 mptmp[cp] = PETSC_FALSE; 6774 cp++; 6775 } 6776 break; 6777 default: 6778 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6779 } 6780 /* sanity check */ 6781 if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i); 6782 6783 ierr = PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);CHKERRQ(ierr); 6784 for (i = 0; i < cp; i++) { 6785 mmdata->mp[i] = mp[i]; 6786 mmdata->mptmp[i] = mptmp[i]; 6787 } 6788 mmdata->cp = cp; 6789 C->product->data = mmdata; 6790 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 6791 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 6792 6793 /* memory type */ 6794 mmdata->mtype = PETSC_MEMTYPE_HOST; 6795 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr); 6796 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr); 6797 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 6798 // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented 6799 //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE; 6800 6801 /* prepare coo coordinates for values insertion */ 6802 6803 /* count total nonzeros of those intermediate seqaij Mats 6804 ncoo_d: # of nonzeros of matrices that do not have offproc entries 6805 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 6806 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 6807 */ 6808 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 6809 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6810 if (mptmp[cp]) continue; 6811 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 6812 const PetscInt *rmap = rmapa[cp]; 6813 const PetscInt mr = mp[cp]->rmap->n; 6814 const PetscInt rs = C->rmap->rstart; 6815 const PetscInt re = C->rmap->rend; 6816 const PetscInt *ii = mm->i; 6817 for (i = 0; i < mr; i++) { 6818 const PetscInt gr = rmap[i]; 6819 const PetscInt nz = ii[i+1] - ii[i]; 6820 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 6821 else ncoo_oown += nz; /* this row is local */ 6822 } 6823 } else ncoo_d += mm->nz; 6824 } 6825 6826 /* 6827 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 6828 6829 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 6830 6831 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 6832 6833 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 6834 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 6835 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 6836 6837 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 6838 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 6839 */ 6840 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); /* +1 to make a csr-like data structure */ 6841 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr); 6842 6843 /* gather (i,j) of nonzeros inserted by remote procs */ 6844 if (hasoffproc) { 6845 PetscSF msf; 6846 PetscInt ncoo2,*coo_i2,*coo_j2; 6847 6848 ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr); 6849 ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr); 6850 ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); /* to collect (i,j) of entries to be sent to others */ 6851 6852 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 6853 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6854 PetscInt *idxoff = mmdata->off[cp]; 6855 PetscInt *idxown = mmdata->own[cp]; 6856 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 6857 const PetscInt *rmap = rmapa[cp]; 6858 const PetscInt *cmap = cmapa[cp]; 6859 const PetscInt *ii = mm->i; 6860 PetscInt *coi = coo_i + ncoo_o; 6861 PetscInt *coj = coo_j + ncoo_o; 6862 const PetscInt mr = mp[cp]->rmap->n; 6863 const PetscInt rs = C->rmap->rstart; 6864 const PetscInt re = C->rmap->rend; 6865 const PetscInt cs = C->cmap->rstart; 6866 for (i = 0; i < mr; i++) { 6867 const PetscInt *jj = mm->j + ii[i]; 6868 const PetscInt gr = rmap[i]; 6869 const PetscInt nz = ii[i+1] - ii[i]; 6870 if (gr < rs || gr >= re) { /* this is an offproc row */ 6871 for (j = ii[i]; j < ii[i+1]; j++) { 6872 *coi++ = gr; 6873 *idxoff++ = j; 6874 } 6875 if (!cmapt[cp]) { /* already global */ 6876 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6877 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6878 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6879 } else { /* offdiag */ 6880 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6881 } 6882 ncoo_o += nz; 6883 } else { /* this is a local row */ 6884 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 6885 } 6886 } 6887 } 6888 mmdata->off[cp + 1] = idxoff; 6889 mmdata->own[cp + 1] = idxown; 6890 } 6891 6892 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6893 ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr); 6894 ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr); 6895 ierr = PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);CHKERRQ(ierr); 6896 ncoo = ncoo_d + ncoo_oown + ncoo2; 6897 ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr); 6898 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); /* put (i,j) of remote nonzeros at back */ 6899 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6900 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6901 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6902 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6903 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 6904 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr); 6905 coo_i = coo_i2; 6906 coo_j = coo_j2; 6907 } else { /* no offproc values insertion */ 6908 ncoo = ncoo_d; 6909 ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr); 6910 6911 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6912 ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr); 6913 ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr); 6914 } 6915 mmdata->hasoffproc = hasoffproc; 6916 6917 /* gather (i,j) of nonzeros inserted locally */ 6918 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 6919 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6920 PetscInt *coi = coo_i + ncoo_d; 6921 PetscInt *coj = coo_j + ncoo_d; 6922 const PetscInt *jj = mm->j; 6923 const PetscInt *ii = mm->i; 6924 const PetscInt *cmap = cmapa[cp]; 6925 const PetscInt *rmap = rmapa[cp]; 6926 const PetscInt mr = mp[cp]->rmap->n; 6927 const PetscInt rs = C->rmap->rstart; 6928 const PetscInt re = C->rmap->rend; 6929 const PetscInt cs = C->cmap->rstart; 6930 6931 if (mptmp[cp]) continue; 6932 if (rmapt[cp] == 1) { /* consecutive rows */ 6933 /* fill coo_i */ 6934 for (i = 0; i < mr; i++) { 6935 const PetscInt gr = i + rs; 6936 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 6937 } 6938 /* fill coo_j */ 6939 if (!cmapt[cp]) { /* type-0, already global */ 6940 ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr); 6941 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 6942 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 6943 } else { /* type-2, local to global for sparse columns */ 6944 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 6945 } 6946 ncoo_d += mm->nz; 6947 } else if (rmapt[cp] == 2) { /* sparse rows */ 6948 for (i = 0; i < mr; i++) { 6949 const PetscInt *jj = mm->j + ii[i]; 6950 const PetscInt gr = rmap[i]; 6951 const PetscInt nz = ii[i+1] - ii[i]; 6952 if (gr >= rs && gr < re) { /* local rows */ 6953 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 6954 if (!cmapt[cp]) { /* type-0, already global */ 6955 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6956 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6957 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6958 } else { /* type-2, local to global for sparse columns */ 6959 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6960 } 6961 ncoo_d += nz; 6962 } 6963 } 6964 } 6965 } 6966 if (glob) { 6967 ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr); 6968 } 6969 ierr = ISDestroy(&glob);CHKERRQ(ierr); 6970 if (P_oth_l2g) { 6971 ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6972 } 6973 ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr); 6974 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 6975 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr); 6976 6977 /* preallocate with COO data */ 6978 ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr); 6979 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6980 PetscFunctionReturn(0); 6981 } 6982 6983 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 6984 { 6985 Mat_Product *product = mat->product; 6986 PetscErrorCode ierr; 6987 #if defined(PETSC_HAVE_DEVICE) 6988 PetscBool match = PETSC_FALSE; 6989 PetscBool usecpu = PETSC_FALSE; 6990 #else 6991 PetscBool match = PETSC_TRUE; 6992 #endif 6993 6994 PetscFunctionBegin; 6995 MatCheckProduct(mat,1); 6996 #if defined(PETSC_HAVE_DEVICE) 6997 if (!product->A->boundtocpu && !product->B->boundtocpu) { 6998 ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr); 6999 } 7000 if (match) { /* we can always fallback to the CPU if requested */ 7001 switch (product->type) { 7002 case MATPRODUCT_AB: 7003 if (product->api_user) { 7004 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 7005 ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7006 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7007 } else { 7008 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 7009 ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7010 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7011 } 7012 break; 7013 case MATPRODUCT_AtB: 7014 if (product->api_user) { 7015 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr); 7016 ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7017 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7018 } else { 7019 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr); 7020 ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7021 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7022 } 7023 break; 7024 case MATPRODUCT_PtAP: 7025 if (product->api_user) { 7026 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 7027 ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7028 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7029 } else { 7030 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 7031 ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7032 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7033 } 7034 break; 7035 default: 7036 break; 7037 } 7038 match = (PetscBool)!usecpu; 7039 } 7040 #endif 7041 if (match) { 7042 switch (product->type) { 7043 case MATPRODUCT_AB: 7044 case MATPRODUCT_AtB: 7045 case MATPRODUCT_PtAP: 7046 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7047 break; 7048 default: 7049 break; 7050 } 7051 } 7052 /* fallback to MPIAIJ ops */ 7053 if (!mat->ops->productsymbolic) { 7054 ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr); 7055 } 7056 PetscFunctionReturn(0); 7057 } 7058