1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 63 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 64 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 65 * to differ from the parent matrix. */ 66 if (a->lvec) { 67 ierr = VecBindToCPU(a->lvec,flg);CHKERRQ(ierr); 68 } 69 if (a->diag) { 70 ierr = VecBindToCPU(a->diag,flg);CHKERRQ(ierr); 71 } 72 73 PetscFunctionReturn(0); 74 } 75 76 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 77 { 78 PetscErrorCode ierr; 79 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 80 81 PetscFunctionBegin; 82 if (mat->A) { 83 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 84 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 85 } 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 90 { 91 PetscErrorCode ierr; 92 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 93 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 94 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 95 const PetscInt *ia,*ib; 96 const MatScalar *aa,*bb,*aav,*bav; 97 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 98 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 99 100 PetscFunctionBegin; 101 *keptrows = NULL; 102 103 ia = a->i; 104 ib = b->i; 105 ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr); 106 ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr); 107 for (i=0; i<m; i++) { 108 na = ia[i+1] - ia[i]; 109 nb = ib[i+1] - ib[i]; 110 if (!na && !nb) { 111 cnt++; 112 goto ok1; 113 } 114 aa = aav + ia[i]; 115 for (j=0; j<na; j++) { 116 if (aa[j] != 0.0) goto ok1; 117 } 118 bb = bav + ib[i]; 119 for (j=0; j <nb; j++) { 120 if (bb[j] != 0.0) goto ok1; 121 } 122 cnt++; 123 ok1:; 124 } 125 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr); 126 if (!n0rows) { 127 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 128 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 132 cnt = 0; 133 for (i=0; i<m; i++) { 134 na = ia[i+1] - ia[i]; 135 nb = ib[i+1] - ib[i]; 136 if (!na && !nb) continue; 137 aa = aav + ia[i]; 138 for (j=0; j<na;j++) { 139 if (aa[j] != 0.0) { 140 rows[cnt++] = rstart + i; 141 goto ok2; 142 } 143 } 144 bb = bav + ib[i]; 145 for (j=0; j<nb; j++) { 146 if (bb[j] != 0.0) { 147 rows[cnt++] = rstart + i; 148 goto ok2; 149 } 150 } 151 ok2:; 152 } 153 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 154 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 155 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 156 PetscFunctionReturn(0); 157 } 158 159 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 160 { 161 PetscErrorCode ierr; 162 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 163 PetscBool cong; 164 165 PetscFunctionBegin; 166 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 167 if (Y->assembled && cong) { 168 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 169 } else { 170 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 171 } 172 PetscFunctionReturn(0); 173 } 174 175 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 176 { 177 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 178 PetscErrorCode ierr; 179 PetscInt i,rstart,nrows,*rows; 180 181 PetscFunctionBegin; 182 *zrows = NULL; 183 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 184 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 185 for (i=0; i<nrows; i++) rows[i] += rstart; 186 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 187 PetscFunctionReturn(0); 188 } 189 190 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 191 { 192 PetscErrorCode ierr; 193 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 194 PetscInt i,m,n,*garray = aij->garray; 195 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 196 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 197 PetscReal *work; 198 const PetscScalar *dummy; 199 200 PetscFunctionBegin; 201 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 202 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 203 ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr); 204 ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr); 205 ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr); 206 ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr); 207 if (type == NORM_2) { 208 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 209 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 210 } 211 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 212 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 213 } 214 } else if (type == NORM_1) { 215 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 216 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 217 } 218 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 219 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 220 } 221 } else if (type == NORM_INFINITY) { 222 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 223 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 224 } 225 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 226 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 227 } 228 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 229 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 230 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 231 } 232 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 233 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 234 } 235 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 236 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 237 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 238 } 239 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 240 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 241 } 242 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 243 if (type == NORM_INFINITY) { 244 ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 245 } else { 246 ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 247 } 248 ierr = PetscFree(work);CHKERRQ(ierr); 249 if (type == NORM_2) { 250 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 251 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 252 for (i=0; i<n; i++) reductions[i] /= m; 253 } 254 PetscFunctionReturn(0); 255 } 256 257 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 258 { 259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 260 IS sis,gis; 261 PetscErrorCode ierr; 262 const PetscInt *isis,*igis; 263 PetscInt n,*iis,nsis,ngis,rstart,i; 264 265 PetscFunctionBegin; 266 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 267 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 268 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 269 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 270 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 271 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 272 273 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 274 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 275 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 276 n = ngis + nsis; 277 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 278 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 279 for (i=0; i<n; i++) iis[i] += rstart; 280 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 281 282 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 283 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 284 ierr = ISDestroy(&sis);CHKERRQ(ierr); 285 ierr = ISDestroy(&gis);CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289 /* 290 Local utility routine that creates a mapping from the global column 291 number to the local number in the off-diagonal part of the local 292 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 293 a slightly higher hash table cost; without it it is not scalable (each processor 294 has an order N integer array but is fast to access. 295 */ 296 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 297 { 298 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 299 PetscErrorCode ierr; 300 PetscInt n = aij->B->cmap->n,i; 301 302 PetscFunctionBegin; 303 PetscCheckFalse(n && !aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 304 #if defined(PETSC_USE_CTABLE) 305 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 306 for (i=0; i<n; i++) { 307 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 308 } 309 #else 310 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 311 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 312 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 313 #endif 314 PetscFunctionReturn(0); 315 } 316 317 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 318 { \ 319 if (col <= lastcol1) low1 = 0; \ 320 else high1 = nrow1; \ 321 lastcol1 = col;\ 322 while (high1-low1 > 5) { \ 323 t = (low1+high1)/2; \ 324 if (rp1[t] > col) high1 = t; \ 325 else low1 = t; \ 326 } \ 327 for (_i=low1; _i<high1; _i++) { \ 328 if (rp1[_i] > col) break; \ 329 if (rp1[_i] == col) { \ 330 if (addv == ADD_VALUES) { \ 331 ap1[_i] += value; \ 332 /* Not sure LogFlops will slow dow the code or not */ \ 333 (void)PetscLogFlops(1.0); \ 334 } \ 335 else ap1[_i] = value; \ 336 goto a_noinsert; \ 337 } \ 338 } \ 339 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 340 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 341 PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 342 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 343 N = nrow1++ - 1; a->nz++; high1++; \ 344 /* shift up all the later entries in this row */ \ 345 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 346 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 347 rp1[_i] = col; \ 348 ap1[_i] = value; \ 349 A->nonzerostate++;\ 350 a_noinsert: ; \ 351 ailen[row] = nrow1; \ 352 } 353 354 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 355 { \ 356 if (col <= lastcol2) low2 = 0; \ 357 else high2 = nrow2; \ 358 lastcol2 = col; \ 359 while (high2-low2 > 5) { \ 360 t = (low2+high2)/2; \ 361 if (rp2[t] > col) high2 = t; \ 362 else low2 = t; \ 363 } \ 364 for (_i=low2; _i<high2; _i++) { \ 365 if (rp2[_i] > col) break; \ 366 if (rp2[_i] == col) { \ 367 if (addv == ADD_VALUES) { \ 368 ap2[_i] += value; \ 369 (void)PetscLogFlops(1.0); \ 370 } \ 371 else ap2[_i] = value; \ 372 goto b_noinsert; \ 373 } \ 374 } \ 375 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 376 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 377 PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 378 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 379 N = nrow2++ - 1; b->nz++; high2++; \ 380 /* shift up all the later entries in this row */ \ 381 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 382 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 383 rp2[_i] = col; \ 384 ap2[_i] = value; \ 385 B->nonzerostate++; \ 386 b_noinsert: ; \ 387 bilen[row] = nrow2; \ 388 } 389 390 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 391 { 392 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 393 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 394 PetscErrorCode ierr; 395 PetscInt l,*garray = mat->garray,diag; 396 PetscScalar *aa,*ba; 397 398 PetscFunctionBegin; 399 /* code only works for square matrices A */ 400 401 /* find size of row to the left of the diagonal part */ 402 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 403 row = row - diag; 404 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 405 if (garray[b->j[b->i[row]+l]] > diag) break; 406 } 407 if (l) { 408 ierr = MatSeqAIJGetArray(mat->B,&ba);CHKERRQ(ierr); 409 ierr = PetscArraycpy(ba+b->i[row],v,l);CHKERRQ(ierr); 410 ierr = MatSeqAIJRestoreArray(mat->B,&ba);CHKERRQ(ierr); 411 } 412 413 /* diagonal part */ 414 if (a->i[row+1]-a->i[row]) { 415 ierr = MatSeqAIJGetArray(mat->A,&aa);CHKERRQ(ierr); 416 ierr = PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 417 ierr = MatSeqAIJRestoreArray(mat->A,&aa);CHKERRQ(ierr); 418 } 419 420 /* right of diagonal part */ 421 if (b->i[row+1]-b->i[row]-l) { 422 ierr = MatSeqAIJGetArray(mat->B,&ba);CHKERRQ(ierr); 423 ierr = PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 424 ierr = MatSeqAIJRestoreArray(mat->B,&ba);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 430 { 431 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 432 PetscScalar value = 0.0; 433 PetscErrorCode ierr; 434 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 435 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 436 PetscBool roworiented = aij->roworiented; 437 438 /* Some Variables required in the macro */ 439 Mat A = aij->A; 440 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 441 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 442 PetscBool ignorezeroentries = a->ignorezeroentries; 443 Mat B = aij->B; 444 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 445 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 446 MatScalar *aa,*ba; 447 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 448 PetscInt nonew; 449 MatScalar *ap1,*ap2; 450 451 PetscFunctionBegin; 452 ierr = MatSeqAIJGetArray(A,&aa);CHKERRQ(ierr); 453 ierr = MatSeqAIJGetArray(B,&ba);CHKERRQ(ierr); 454 for (i=0; i<m; i++) { 455 if (im[i] < 0) continue; 456 PetscCheckFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 457 if (im[i] >= rstart && im[i] < rend) { 458 row = im[i] - rstart; 459 lastcol1 = -1; 460 rp1 = aj + ai[row]; 461 ap1 = aa + ai[row]; 462 rmax1 = aimax[row]; 463 nrow1 = ailen[row]; 464 low1 = 0; 465 high1 = nrow1; 466 lastcol2 = -1; 467 rp2 = bj + bi[row]; 468 ap2 = ba + bi[row]; 469 rmax2 = bimax[row]; 470 nrow2 = bilen[row]; 471 low2 = 0; 472 high2 = nrow2; 473 474 for (j=0; j<n; j++) { 475 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 476 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 477 if (in[j] >= cstart && in[j] < cend) { 478 col = in[j] - cstart; 479 nonew = a->nonew; 480 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 481 } else if (in[j] < 0) continue; 482 else PetscCheckFalse(in[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 483 else { 484 if (mat->was_assembled) { 485 if (!aij->colmap) { 486 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 487 } 488 #if defined(PETSC_USE_CTABLE) 489 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); /* map global col ids to local ones */ 490 col--; 491 #else 492 col = aij->colmap[in[j]] - 1; 493 #endif 494 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 495 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); /* Change aij->B from reduced/local format to expanded/global format */ 496 col = in[j]; 497 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 498 B = aij->B; 499 b = (Mat_SeqAIJ*)B->data; 500 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 501 rp2 = bj + bi[row]; 502 ap2 = ba + bi[row]; 503 rmax2 = bimax[row]; 504 nrow2 = bilen[row]; 505 low2 = 0; 506 high2 = nrow2; 507 bm = aij->B->rmap->n; 508 ba = b->a; 509 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 510 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 511 ierr = PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 512 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 513 } 514 } else col = in[j]; 515 nonew = b->nonew; 516 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 517 } 518 } 519 } else { 520 PetscCheckFalse(mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 521 if (!aij->donotstash) { 522 mat->assembled = PETSC_FALSE; 523 if (roworiented) { 524 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 525 } else { 526 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 527 } 528 } 529 } 530 } 531 ierr = MatSeqAIJRestoreArray(A,&aa);CHKERRQ(ierr); 532 ierr = MatSeqAIJRestoreArray(B,&ba);CHKERRQ(ierr); 533 PetscFunctionReturn(0); 534 } 535 536 /* 537 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 538 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 539 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 540 */ 541 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 542 { 543 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 544 Mat A = aij->A; /* diagonal part of the matrix */ 545 Mat B = aij->B; /* offdiagonal part of the matrix */ 546 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 547 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 548 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 549 PetscInt *ailen = a->ilen,*aj = a->j; 550 PetscInt *bilen = b->ilen,*bj = b->j; 551 PetscInt am = aij->A->rmap->n,j; 552 PetscInt diag_so_far = 0,dnz; 553 PetscInt offd_so_far = 0,onz; 554 555 PetscFunctionBegin; 556 /* Iterate over all rows of the matrix */ 557 for (j=0; j<am; j++) { 558 dnz = onz = 0; 559 /* Iterate over all non-zero columns of the current row */ 560 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 561 /* If column is in the diagonal */ 562 if (mat_j[col] >= cstart && mat_j[col] < cend) { 563 aj[diag_so_far++] = mat_j[col] - cstart; 564 dnz++; 565 } else { /* off-diagonal entries */ 566 bj[offd_so_far++] = mat_j[col]; 567 onz++; 568 } 569 } 570 ailen[j] = dnz; 571 bilen[j] = onz; 572 } 573 PetscFunctionReturn(0); 574 } 575 576 /* 577 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 578 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 579 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 580 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 581 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 582 */ 583 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 584 { 585 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 586 Mat A = aij->A; /* diagonal part of the matrix */ 587 Mat B = aij->B; /* offdiagonal part of the matrix */ 588 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 589 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 590 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 591 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 592 PetscInt *ailen = a->ilen,*aj = a->j; 593 PetscInt *bilen = b->ilen,*bj = b->j; 594 PetscInt am = aij->A->rmap->n,j; 595 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 596 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 597 PetscScalar *aa = a->a,*ba = b->a; 598 599 PetscFunctionBegin; 600 /* Iterate over all rows of the matrix */ 601 for (j=0; j<am; j++) { 602 dnz_row = onz_row = 0; 603 rowstart_offd = full_offd_i[j]; 604 rowstart_diag = full_diag_i[j]; 605 /* Iterate over all non-zero columns of the current row */ 606 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 607 /* If column is in the diagonal */ 608 if (mat_j[col] >= cstart && mat_j[col] < cend) { 609 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 610 aa[rowstart_diag+dnz_row] = mat_a[col]; 611 dnz_row++; 612 } else { /* off-diagonal entries */ 613 bj[rowstart_offd+onz_row] = mat_j[col]; 614 ba[rowstart_offd+onz_row] = mat_a[col]; 615 onz_row++; 616 } 617 } 618 ailen[j] = dnz_row; 619 bilen[j] = onz_row; 620 } 621 PetscFunctionReturn(0); 622 } 623 624 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 625 { 626 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 627 PetscErrorCode ierr; 628 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 629 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 630 631 PetscFunctionBegin; 632 for (i=0; i<m; i++) { 633 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %" PetscInt_FMT,idxm[i]);*/ 634 PetscCheckFalse(idxm[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 635 if (idxm[i] >= rstart && idxm[i] < rend) { 636 row = idxm[i] - rstart; 637 for (j=0; j<n; j++) { 638 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %" PetscInt_FMT,idxn[j]); */ 639 PetscCheckFalse(idxn[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 640 if (idxn[j] >= cstart && idxn[j] < cend) { 641 col = idxn[j] - cstart; 642 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 643 } else { 644 if (!aij->colmap) { 645 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 646 } 647 #if defined(PETSC_USE_CTABLE) 648 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 649 col--; 650 #else 651 col = aij->colmap[idxn[j]] - 1; 652 #endif 653 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 654 else { 655 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 656 } 657 } 658 } 659 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 660 } 661 PetscFunctionReturn(0); 662 } 663 664 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 665 { 666 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 667 PetscErrorCode ierr; 668 PetscInt nstash,reallocs; 669 670 PetscFunctionBegin; 671 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 672 673 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 674 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 675 ierr = PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 676 PetscFunctionReturn(0); 677 } 678 679 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 680 { 681 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 682 PetscErrorCode ierr; 683 PetscMPIInt n; 684 PetscInt i,j,rstart,ncols,flg; 685 PetscInt *row,*col; 686 PetscBool other_disassembled; 687 PetscScalar *val; 688 689 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 690 691 PetscFunctionBegin; 692 if (!aij->donotstash && !mat->nooffprocentries) { 693 while (1) { 694 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 695 if (!flg) break; 696 697 for (i=0; i<n;) { 698 /* Now identify the consecutive vals belonging to the same row */ 699 for (j=i,rstart=row[j]; j<n; j++) { 700 if (row[j] != rstart) break; 701 } 702 if (j < n) ncols = j-i; 703 else ncols = n-i; 704 /* Now assemble all these values with a single function call */ 705 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 706 i = j; 707 } 708 } 709 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 710 } 711 #if defined(PETSC_HAVE_DEVICE) 712 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 713 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 714 if (mat->boundtocpu) { 715 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 716 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 717 } 718 #endif 719 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 720 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 721 722 /* determine if any processor has disassembled, if so we must 723 also disassemble ourself, in order that we may reassemble. */ 724 /* 725 if nonzero structure of submatrix B cannot change then we know that 726 no processor disassembled thus we can skip this stuff 727 */ 728 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 729 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 730 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 731 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 732 } 733 } 734 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 735 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 736 } 737 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 738 #if defined(PETSC_HAVE_DEVICE) 739 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 740 #endif 741 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 742 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 743 744 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 745 746 aij->rowvalues = NULL; 747 748 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 749 750 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 751 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 752 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 753 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 754 } 755 #if defined(PETSC_HAVE_DEVICE) 756 mat->offloadmask = PETSC_OFFLOAD_BOTH; 757 #endif 758 PetscFunctionReturn(0); 759 } 760 761 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 762 { 763 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 764 PetscErrorCode ierr; 765 766 PetscFunctionBegin; 767 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 768 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 769 PetscFunctionReturn(0); 770 } 771 772 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 773 { 774 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 775 PetscObjectState sA, sB; 776 PetscInt *lrows; 777 PetscInt r, len; 778 PetscBool cong, lch, gch; 779 PetscErrorCode ierr; 780 781 PetscFunctionBegin; 782 /* get locally owned rows */ 783 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 784 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 785 /* fix right hand side if needed */ 786 if (x && b) { 787 const PetscScalar *xx; 788 PetscScalar *bb; 789 790 PetscCheckFalse(!cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 791 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 792 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 793 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 794 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 795 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 796 } 797 798 sA = mat->A->nonzerostate; 799 sB = mat->B->nonzerostate; 800 801 if (diag != 0.0 && cong) { 802 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 803 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 804 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 805 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 806 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 807 PetscInt nnwA, nnwB; 808 PetscBool nnzA, nnzB; 809 810 nnwA = aijA->nonew; 811 nnwB = aijB->nonew; 812 nnzA = aijA->keepnonzeropattern; 813 nnzB = aijB->keepnonzeropattern; 814 if (!nnzA) { 815 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 816 aijA->nonew = 0; 817 } 818 if (!nnzB) { 819 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 820 aijB->nonew = 0; 821 } 822 /* Must zero here before the next loop */ 823 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 824 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 825 for (r = 0; r < len; ++r) { 826 const PetscInt row = lrows[r] + A->rmap->rstart; 827 if (row >= A->cmap->N) continue; 828 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 829 } 830 aijA->nonew = nnwA; 831 aijB->nonew = nnwB; 832 } else { 833 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 834 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 835 } 836 ierr = PetscFree(lrows);CHKERRQ(ierr); 837 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 838 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 839 840 /* reduce nonzerostate */ 841 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 842 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 843 if (gch) A->nonzerostate++; 844 PetscFunctionReturn(0); 845 } 846 847 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 848 { 849 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 850 PetscErrorCode ierr; 851 PetscMPIInt n = A->rmap->n; 852 PetscInt i,j,r,m,len = 0; 853 PetscInt *lrows,*owners = A->rmap->range; 854 PetscMPIInt p = 0; 855 PetscSFNode *rrows; 856 PetscSF sf; 857 const PetscScalar *xx; 858 PetscScalar *bb,*mask,*aij_a; 859 Vec xmask,lmask; 860 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 861 const PetscInt *aj, *ii,*ridx; 862 PetscScalar *aa; 863 864 PetscFunctionBegin; 865 /* Create SF where leaves are input rows and roots are owned rows */ 866 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 867 for (r = 0; r < n; ++r) lrows[r] = -1; 868 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 869 for (r = 0; r < N; ++r) { 870 const PetscInt idx = rows[r]; 871 PetscCheckFalse(idx < 0 || A->rmap->N <= idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 872 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 873 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 874 } 875 rrows[r].rank = p; 876 rrows[r].index = rows[r] - owners[p]; 877 } 878 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 879 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 880 /* Collect flags for rows to be zeroed */ 881 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 882 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 883 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 884 /* Compress and put in row numbers */ 885 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 886 /* zero diagonal part of matrix */ 887 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 888 /* handle off diagonal part of matrix */ 889 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 890 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 891 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 892 for (i=0; i<len; i++) bb[lrows[i]] = 1; 893 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 894 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 895 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 897 if (x && b) { /* this code is buggy when the row and column layout don't match */ 898 PetscBool cong; 899 900 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 901 PetscCheckFalse(!cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 902 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 903 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 904 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 905 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 906 } 907 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 908 /* remove zeroed rows of off diagonal matrix */ 909 ierr = MatSeqAIJGetArray(l->B,&aij_a);CHKERRQ(ierr); 910 ii = aij->i; 911 for (i=0; i<len; i++) { 912 ierr = PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 913 } 914 /* loop over all elements of off process part of matrix zeroing removed columns*/ 915 if (aij->compressedrow.use) { 916 m = aij->compressedrow.nrows; 917 ii = aij->compressedrow.i; 918 ridx = aij->compressedrow.rindex; 919 for (i=0; i<m; i++) { 920 n = ii[i+1] - ii[i]; 921 aj = aij->j + ii[i]; 922 aa = aij_a + ii[i]; 923 924 for (j=0; j<n; j++) { 925 if (PetscAbsScalar(mask[*aj])) { 926 if (b) bb[*ridx] -= *aa*xx[*aj]; 927 *aa = 0.0; 928 } 929 aa++; 930 aj++; 931 } 932 ridx++; 933 } 934 } else { /* do not use compressed row format */ 935 m = l->B->rmap->n; 936 for (i=0; i<m; i++) { 937 n = ii[i+1] - ii[i]; 938 aj = aij->j + ii[i]; 939 aa = aij_a + ii[i]; 940 for (j=0; j<n; j++) { 941 if (PetscAbsScalar(mask[*aj])) { 942 if (b) bb[i] -= *aa*xx[*aj]; 943 *aa = 0.0; 944 } 945 aa++; 946 aj++; 947 } 948 } 949 } 950 if (x && b) { 951 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 952 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 953 } 954 ierr = MatSeqAIJRestoreArray(l->B,&aij_a);CHKERRQ(ierr); 955 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 956 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 957 ierr = PetscFree(lrows);CHKERRQ(ierr); 958 959 /* only change matrix nonzero state if pattern was allowed to be changed */ 960 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 961 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 962 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 963 } 964 PetscFunctionReturn(0); 965 } 966 967 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 968 { 969 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 970 PetscErrorCode ierr; 971 PetscInt nt; 972 VecScatter Mvctx = a->Mvctx; 973 974 PetscFunctionBegin; 975 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 976 PetscCheckFalse(nt != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt); 977 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 978 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 979 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 980 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 985 { 986 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 987 PetscErrorCode ierr; 988 989 PetscFunctionBegin; 990 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 991 PetscFunctionReturn(0); 992 } 993 994 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 995 { 996 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 997 PetscErrorCode ierr; 998 VecScatter Mvctx = a->Mvctx; 999 1000 PetscFunctionBegin; 1001 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1003 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1004 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1009 { 1010 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1011 PetscErrorCode ierr; 1012 1013 PetscFunctionBegin; 1014 /* do nondiagonal part */ 1015 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1016 /* do local part */ 1017 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1018 /* add partial results together */ 1019 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1020 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1021 PetscFunctionReturn(0); 1022 } 1023 1024 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1025 { 1026 MPI_Comm comm; 1027 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1028 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1029 IS Me,Notme; 1030 PetscErrorCode ierr; 1031 PetscInt M,N,first,last,*notme,i; 1032 PetscBool lf; 1033 PetscMPIInt size; 1034 1035 PetscFunctionBegin; 1036 /* Easy test: symmetric diagonal block */ 1037 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1038 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1039 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr); 1040 if (!*f) PetscFunctionReturn(0); 1041 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1042 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 1043 if (size == 1) PetscFunctionReturn(0); 1044 1045 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1046 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1047 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1048 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1049 for (i=0; i<first; i++) notme[i] = i; 1050 for (i=last; i<M; i++) notme[i-last+first] = i; 1051 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1052 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1053 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1054 Aoff = Aoffs[0]; 1055 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1056 Boff = Boffs[0]; 1057 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1058 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1059 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1060 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1061 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1062 ierr = PetscFree(notme);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1067 { 1068 PetscErrorCode ierr; 1069 1070 PetscFunctionBegin; 1071 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1072 PetscFunctionReturn(0); 1073 } 1074 1075 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1076 { 1077 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1078 PetscErrorCode ierr; 1079 1080 PetscFunctionBegin; 1081 /* do nondiagonal part */ 1082 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1083 /* do local part */ 1084 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1085 /* add partial results together */ 1086 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1087 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } 1090 1091 /* 1092 This only works correctly for square matrices where the subblock A->A is the 1093 diagonal block 1094 */ 1095 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1096 { 1097 PetscErrorCode ierr; 1098 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1099 1100 PetscFunctionBegin; 1101 PetscCheckFalse(A->rmap->N != A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1102 PetscCheckFalse(A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1103 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1108 { 1109 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1110 PetscErrorCode ierr; 1111 1112 PetscFunctionBegin; 1113 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1114 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1119 { 1120 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1121 PetscErrorCode ierr; 1122 1123 PetscFunctionBegin; 1124 #if defined(PETSC_USE_LOG) 1125 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1126 #endif 1127 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1128 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1129 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1130 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1131 #if defined(PETSC_USE_CTABLE) 1132 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1133 #else 1134 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1135 #endif 1136 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1137 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1138 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1139 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1140 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1141 1142 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1143 ierr = PetscSFDestroy(&aij->coo_sf);CHKERRQ(ierr); 1144 ierr = PetscFree4(aij->Aperm1,aij->Bperm1,aij->Ajmap1,aij->Bjmap1);CHKERRQ(ierr); 1145 ierr = PetscFree4(aij->Aperm2,aij->Bperm2,aij->Ajmap2,aij->Bjmap2);CHKERRQ(ierr); 1146 ierr = PetscFree4(aij->Aimap1,aij->Bimap1,aij->Aimap2,aij->Bimap2);CHKERRQ(ierr); 1147 ierr = PetscFree2(aij->sendbuf,aij->recvbuf);CHKERRQ(ierr); 1148 ierr = PetscFree(aij->Cperm1);CHKERRQ(ierr); 1149 1150 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1151 1152 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1153 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1154 1155 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1156 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1157 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1158 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1159 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1160 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1161 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1162 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1163 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1164 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1165 #if defined(PETSC_HAVE_CUDA) 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1167 #endif 1168 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1169 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr); 1170 #endif 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 1172 #if defined(PETSC_HAVE_ELEMENTAL) 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1174 #endif 1175 #if defined(PETSC_HAVE_SCALAPACK) 1176 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1177 #endif 1178 #if defined(PETSC_HAVE_HYPRE) 1179 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1180 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1181 #endif 1182 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1183 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1184 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1185 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr); 1186 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr); 1187 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr); 1188 #if defined(PETSC_HAVE_MKL_SPARSE) 1189 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr); 1190 #endif 1191 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr); 1192 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1193 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr); 1194 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL);CHKERRQ(ierr); 1195 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL);CHKERRQ(ierr); 1196 PetscFunctionReturn(0); 1197 } 1198 1199 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1200 { 1201 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1202 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1203 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1204 const PetscInt *garray = aij->garray; 1205 const PetscScalar *aa,*ba; 1206 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1207 PetscInt *rowlens; 1208 PetscInt *colidxs; 1209 PetscScalar *matvals; 1210 PetscErrorCode ierr; 1211 1212 PetscFunctionBegin; 1213 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1214 1215 M = mat->rmap->N; 1216 N = mat->cmap->N; 1217 m = mat->rmap->n; 1218 rs = mat->rmap->rstart; 1219 cs = mat->cmap->rstart; 1220 nz = A->nz + B->nz; 1221 1222 /* write matrix header */ 1223 header[0] = MAT_FILE_CLASSID; 1224 header[1] = M; header[2] = N; header[3] = nz; 1225 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1226 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1227 1228 /* fill in and store row lengths */ 1229 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1230 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1231 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1232 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1233 1234 /* fill in and store column indices */ 1235 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1236 for (cnt=0, i=0; i<m; i++) { 1237 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1238 if (garray[B->j[jb]] > cs) break; 1239 colidxs[cnt++] = garray[B->j[jb]]; 1240 } 1241 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1242 colidxs[cnt++] = A->j[ja] + cs; 1243 for (; jb<B->i[i+1]; jb++) 1244 colidxs[cnt++] = garray[B->j[jb]]; 1245 } 1246 PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1247 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1248 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1249 1250 /* fill in and store nonzero values */ 1251 ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr); 1252 ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr); 1253 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1254 for (cnt=0, i=0; i<m; i++) { 1255 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1256 if (garray[B->j[jb]] > cs) break; 1257 matvals[cnt++] = ba[jb]; 1258 } 1259 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1260 matvals[cnt++] = aa[ja]; 1261 for (; jb<B->i[i+1]; jb++) 1262 matvals[cnt++] = ba[jb]; 1263 } 1264 ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr); 1265 ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr); 1266 PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1267 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1268 ierr = PetscFree(matvals);CHKERRQ(ierr); 1269 1270 /* write block size option to the viewer's .info file */ 1271 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1272 PetscFunctionReturn(0); 1273 } 1274 1275 #include <petscdraw.h> 1276 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1277 { 1278 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1279 PetscErrorCode ierr; 1280 PetscMPIInt rank = aij->rank,size = aij->size; 1281 PetscBool isdraw,iascii,isbinary; 1282 PetscViewer sviewer; 1283 PetscViewerFormat format; 1284 1285 PetscFunctionBegin; 1286 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1287 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1288 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1289 if (iascii) { 1290 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1291 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1292 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1293 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1294 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1295 for (i=0; i<(PetscInt)size; i++) { 1296 nmax = PetscMax(nmax,nz[i]); 1297 nmin = PetscMin(nmin,nz[i]); 1298 navg += nz[i]; 1299 } 1300 ierr = PetscFree(nz);CHKERRQ(ierr); 1301 navg = navg/size; 1302 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n",nmin,navg,nmax);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1306 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1307 MatInfo info; 1308 PetscInt *inodes=NULL; 1309 1310 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 1311 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1312 ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr); 1313 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1314 if (!inodes) { 1315 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", 1316 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1317 } else { 1318 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", 1319 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1320 } 1321 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1322 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1323 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1324 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1325 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1326 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1327 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1328 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1329 PetscFunctionReturn(0); 1330 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1331 PetscInt inodecount,inodelimit,*inodes; 1332 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1333 if (inodes) { 1334 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit);CHKERRQ(ierr); 1335 } else { 1336 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1337 } 1338 PetscFunctionReturn(0); 1339 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1340 PetscFunctionReturn(0); 1341 } 1342 } else if (isbinary) { 1343 if (size == 1) { 1344 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1345 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1346 } else { 1347 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1348 } 1349 PetscFunctionReturn(0); 1350 } else if (iascii && size == 1) { 1351 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1352 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1353 PetscFunctionReturn(0); 1354 } else if (isdraw) { 1355 PetscDraw draw; 1356 PetscBool isnull; 1357 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1358 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1359 if (isnull) PetscFunctionReturn(0); 1360 } 1361 1362 { /* assemble the entire matrix onto first processor */ 1363 Mat A = NULL, Av; 1364 IS isrow,iscol; 1365 1366 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1367 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1368 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1369 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1370 /* The commented code uses MatCreateSubMatrices instead */ 1371 /* 1372 Mat *AA, A = NULL, Av; 1373 IS isrow,iscol; 1374 1375 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1376 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1377 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1378 if (rank == 0) { 1379 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1380 A = AA[0]; 1381 Av = AA[0]; 1382 } 1383 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1384 */ 1385 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1386 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1387 /* 1388 Everyone has to call to draw the matrix since the graphics waits are 1389 synchronized across all processors that share the PetscDraw object 1390 */ 1391 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1392 if (rank == 0) { 1393 if (((PetscObject)mat)->name) { 1394 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1395 } 1396 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1397 } 1398 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1399 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1400 ierr = MatDestroy(&A);CHKERRQ(ierr); 1401 } 1402 PetscFunctionReturn(0); 1403 } 1404 1405 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1406 { 1407 PetscErrorCode ierr; 1408 PetscBool iascii,isdraw,issocket,isbinary; 1409 1410 PetscFunctionBegin; 1411 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1412 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1413 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1414 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1415 if (iascii || isdraw || isbinary || issocket) { 1416 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1417 } 1418 PetscFunctionReturn(0); 1419 } 1420 1421 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1422 { 1423 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1424 PetscErrorCode ierr; 1425 Vec bb1 = NULL; 1426 PetscBool hasop; 1427 1428 PetscFunctionBegin; 1429 if (flag == SOR_APPLY_UPPER) { 1430 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1431 PetscFunctionReturn(0); 1432 } 1433 1434 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1435 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1436 } 1437 1438 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1439 if (flag & SOR_ZERO_INITIAL_GUESS) { 1440 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1441 its--; 1442 } 1443 1444 while (its--) { 1445 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1446 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1447 1448 /* update rhs: bb1 = bb - B*x */ 1449 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1450 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1451 1452 /* local sweep */ 1453 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1454 } 1455 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1456 if (flag & SOR_ZERO_INITIAL_GUESS) { 1457 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1458 its--; 1459 } 1460 while (its--) { 1461 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1462 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1463 1464 /* update rhs: bb1 = bb - B*x */ 1465 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1466 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1467 1468 /* local sweep */ 1469 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1470 } 1471 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1472 if (flag & SOR_ZERO_INITIAL_GUESS) { 1473 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1474 its--; 1475 } 1476 while (its--) { 1477 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1478 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1479 1480 /* update rhs: bb1 = bb - B*x */ 1481 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1482 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1483 1484 /* local sweep */ 1485 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1486 } 1487 } else if (flag & SOR_EISENSTAT) { 1488 Vec xx1; 1489 1490 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1491 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1492 1493 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1494 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1495 if (!mat->diag) { 1496 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1497 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1498 } 1499 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1500 if (hasop) { 1501 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1502 } else { 1503 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1504 } 1505 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1506 1507 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1508 1509 /* local sweep */ 1510 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1511 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1512 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1513 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1514 1515 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1516 1517 matin->factorerrortype = mat->A->factorerrortype; 1518 PetscFunctionReturn(0); 1519 } 1520 1521 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1522 { 1523 Mat aA,aB,Aperm; 1524 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1525 PetscScalar *aa,*ba; 1526 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1527 PetscSF rowsf,sf; 1528 IS parcolp = NULL; 1529 PetscBool done; 1530 PetscErrorCode ierr; 1531 1532 PetscFunctionBegin; 1533 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1534 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1535 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1536 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1537 1538 /* Invert row permutation to find out where my rows should go */ 1539 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1540 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1541 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1542 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1543 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1544 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1545 1546 /* Invert column permutation to find out where my columns should go */ 1547 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1548 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1549 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1550 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1551 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1552 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1553 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1554 1555 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1556 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1557 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1558 1559 /* Find out where my gcols should go */ 1560 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1561 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1562 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1563 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1564 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1565 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1566 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1567 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1568 1569 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1570 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1571 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1572 for (i=0; i<m; i++) { 1573 PetscInt row = rdest[i]; 1574 PetscMPIInt rowner; 1575 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1576 for (j=ai[i]; j<ai[i+1]; j++) { 1577 PetscInt col = cdest[aj[j]]; 1578 PetscMPIInt cowner; 1579 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1580 if (rowner == cowner) dnnz[i]++; 1581 else onnz[i]++; 1582 } 1583 for (j=bi[i]; j<bi[i+1]; j++) { 1584 PetscInt col = gcdest[bj[j]]; 1585 PetscMPIInt cowner; 1586 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1587 if (rowner == cowner) dnnz[i]++; 1588 else onnz[i]++; 1589 } 1590 } 1591 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1592 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1593 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1594 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1595 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1596 1597 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1598 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1599 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1600 for (i=0; i<m; i++) { 1601 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1602 PetscInt j0,rowlen; 1603 rowlen = ai[i+1] - ai[i]; 1604 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1605 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1606 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1607 } 1608 rowlen = bi[i+1] - bi[i]; 1609 for (j0=j=0; j<rowlen; j0=j) { 1610 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1611 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1612 } 1613 } 1614 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1615 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1616 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1617 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1618 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1619 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1620 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1621 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1622 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1623 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1624 *B = Aperm; 1625 PetscFunctionReturn(0); 1626 } 1627 1628 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1629 { 1630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1631 PetscErrorCode ierr; 1632 1633 PetscFunctionBegin; 1634 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1635 if (ghosts) *ghosts = aij->garray; 1636 PetscFunctionReturn(0); 1637 } 1638 1639 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1640 { 1641 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1642 Mat A = mat->A,B = mat->B; 1643 PetscErrorCode ierr; 1644 PetscLogDouble isend[5],irecv[5]; 1645 1646 PetscFunctionBegin; 1647 info->block_size = 1.0; 1648 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1649 1650 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1651 isend[3] = info->memory; isend[4] = info->mallocs; 1652 1653 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1654 1655 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1656 isend[3] += info->memory; isend[4] += info->mallocs; 1657 if (flag == MAT_LOCAL) { 1658 info->nz_used = isend[0]; 1659 info->nz_allocated = isend[1]; 1660 info->nz_unneeded = isend[2]; 1661 info->memory = isend[3]; 1662 info->mallocs = isend[4]; 1663 } else if (flag == MAT_GLOBAL_MAX) { 1664 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1665 1666 info->nz_used = irecv[0]; 1667 info->nz_allocated = irecv[1]; 1668 info->nz_unneeded = irecv[2]; 1669 info->memory = irecv[3]; 1670 info->mallocs = irecv[4]; 1671 } else if (flag == MAT_GLOBAL_SUM) { 1672 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1673 1674 info->nz_used = irecv[0]; 1675 info->nz_allocated = irecv[1]; 1676 info->nz_unneeded = irecv[2]; 1677 info->memory = irecv[3]; 1678 info->mallocs = irecv[4]; 1679 } 1680 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1681 info->fill_ratio_needed = 0; 1682 info->factor_mallocs = 0; 1683 PetscFunctionReturn(0); 1684 } 1685 1686 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1687 { 1688 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1689 PetscErrorCode ierr; 1690 1691 PetscFunctionBegin; 1692 switch (op) { 1693 case MAT_NEW_NONZERO_LOCATIONS: 1694 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1695 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1696 case MAT_KEEP_NONZERO_PATTERN: 1697 case MAT_NEW_NONZERO_LOCATION_ERR: 1698 case MAT_USE_INODES: 1699 case MAT_IGNORE_ZERO_ENTRIES: 1700 case MAT_FORM_EXPLICIT_TRANSPOSE: 1701 MatCheckPreallocated(A,1); 1702 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1703 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1704 break; 1705 case MAT_ROW_ORIENTED: 1706 MatCheckPreallocated(A,1); 1707 a->roworiented = flg; 1708 1709 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1710 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1711 break; 1712 case MAT_FORCE_DIAGONAL_ENTRIES: 1713 case MAT_SORTED_FULL: 1714 ierr = PetscInfo(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1715 break; 1716 case MAT_IGNORE_OFF_PROC_ENTRIES: 1717 a->donotstash = flg; 1718 break; 1719 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1720 case MAT_SPD: 1721 case MAT_SYMMETRIC: 1722 case MAT_STRUCTURALLY_SYMMETRIC: 1723 case MAT_HERMITIAN: 1724 case MAT_SYMMETRY_ETERNAL: 1725 break; 1726 case MAT_SUBMAT_SINGLEIS: 1727 A->submat_singleis = flg; 1728 break; 1729 case MAT_STRUCTURE_ONLY: 1730 /* The option is handled directly by MatSetOption() */ 1731 break; 1732 default: 1733 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1734 } 1735 PetscFunctionReturn(0); 1736 } 1737 1738 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1739 { 1740 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1741 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1742 PetscErrorCode ierr; 1743 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1744 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1745 PetscInt *cmap,*idx_p; 1746 1747 PetscFunctionBegin; 1748 PetscCheckFalse(mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1749 mat->getrowactive = PETSC_TRUE; 1750 1751 if (!mat->rowvalues && (idx || v)) { 1752 /* 1753 allocate enough space to hold information from the longest row. 1754 */ 1755 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1756 PetscInt max = 1,tmp; 1757 for (i=0; i<matin->rmap->n; i++) { 1758 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1759 if (max < tmp) max = tmp; 1760 } 1761 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1762 } 1763 1764 PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1765 lrow = row - rstart; 1766 1767 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1768 if (!v) {pvA = NULL; pvB = NULL;} 1769 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1770 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1771 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1772 nztot = nzA + nzB; 1773 1774 cmap = mat->garray; 1775 if (v || idx) { 1776 if (nztot) { 1777 /* Sort by increasing column numbers, assuming A and B already sorted */ 1778 PetscInt imark = -1; 1779 if (v) { 1780 *v = v_p = mat->rowvalues; 1781 for (i=0; i<nzB; i++) { 1782 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1783 else break; 1784 } 1785 imark = i; 1786 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1787 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1788 } 1789 if (idx) { 1790 *idx = idx_p = mat->rowindices; 1791 if (imark > -1) { 1792 for (i=0; i<imark; i++) { 1793 idx_p[i] = cmap[cworkB[i]]; 1794 } 1795 } else { 1796 for (i=0; i<nzB; i++) { 1797 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1798 else break; 1799 } 1800 imark = i; 1801 } 1802 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1803 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1804 } 1805 } else { 1806 if (idx) *idx = NULL; 1807 if (v) *v = NULL; 1808 } 1809 } 1810 *nz = nztot; 1811 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1812 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1813 PetscFunctionReturn(0); 1814 } 1815 1816 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1817 { 1818 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1819 1820 PetscFunctionBegin; 1821 PetscCheckFalse(!aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1822 aij->getrowactive = PETSC_FALSE; 1823 PetscFunctionReturn(0); 1824 } 1825 1826 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1827 { 1828 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1829 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1830 PetscErrorCode ierr; 1831 PetscInt i,j,cstart = mat->cmap->rstart; 1832 PetscReal sum = 0.0; 1833 const MatScalar *v,*amata,*bmata; 1834 1835 PetscFunctionBegin; 1836 if (aij->size == 1) { 1837 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1838 } else { 1839 ierr = MatSeqAIJGetArrayRead(aij->A,&amata);CHKERRQ(ierr); 1840 ierr = MatSeqAIJGetArrayRead(aij->B,&bmata);CHKERRQ(ierr); 1841 if (type == NORM_FROBENIUS) { 1842 v = amata; 1843 for (i=0; i<amat->nz; i++) { 1844 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1845 } 1846 v = bmata; 1847 for (i=0; i<bmat->nz; i++) { 1848 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1849 } 1850 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1851 *norm = PetscSqrtReal(*norm); 1852 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1853 } else if (type == NORM_1) { /* max column norm */ 1854 PetscReal *tmp,*tmp2; 1855 PetscInt *jj,*garray = aij->garray; 1856 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1857 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1858 *norm = 0.0; 1859 v = amata; jj = amat->j; 1860 for (j=0; j<amat->nz; j++) { 1861 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1862 } 1863 v = bmata; jj = bmat->j; 1864 for (j=0; j<bmat->nz; j++) { 1865 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1866 } 1867 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1868 for (j=0; j<mat->cmap->N; j++) { 1869 if (tmp2[j] > *norm) *norm = tmp2[j]; 1870 } 1871 ierr = PetscFree(tmp);CHKERRQ(ierr); 1872 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1873 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1874 } else if (type == NORM_INFINITY) { /* max row norm */ 1875 PetscReal ntemp = 0.0; 1876 for (j=0; j<aij->A->rmap->n; j++) { 1877 v = amata + amat->i[j]; 1878 sum = 0.0; 1879 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1880 sum += PetscAbsScalar(*v); v++; 1881 } 1882 v = bmata + bmat->i[j]; 1883 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1884 sum += PetscAbsScalar(*v); v++; 1885 } 1886 if (sum > ntemp) ntemp = sum; 1887 } 1888 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1889 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1890 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1891 ierr = MatSeqAIJRestoreArrayRead(aij->A,&amata);CHKERRQ(ierr); 1892 ierr = MatSeqAIJRestoreArrayRead(aij->B,&bmata);CHKERRQ(ierr); 1893 } 1894 PetscFunctionReturn(0); 1895 } 1896 1897 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1898 { 1899 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1900 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1901 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1902 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1903 PetscErrorCode ierr; 1904 Mat B,A_diag,*B_diag; 1905 const MatScalar *pbv,*bv; 1906 1907 PetscFunctionBegin; 1908 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1909 ai = Aloc->i; aj = Aloc->j; 1910 bi = Bloc->i; bj = Bloc->j; 1911 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1912 PetscInt *d_nnz,*g_nnz,*o_nnz; 1913 PetscSFNode *oloc; 1914 PETSC_UNUSED PetscSF sf; 1915 1916 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1917 /* compute d_nnz for preallocation */ 1918 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 1919 for (i=0; i<ai[ma]; i++) { 1920 d_nnz[aj[i]]++; 1921 } 1922 /* compute local off-diagonal contributions */ 1923 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 1924 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1925 /* map those to global */ 1926 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1927 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1928 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1929 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 1930 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1931 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1932 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1933 1934 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1935 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1936 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1937 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1938 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1939 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1940 } else { 1941 B = *matout; 1942 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1943 } 1944 1945 b = (Mat_MPIAIJ*)B->data; 1946 A_diag = a->A; 1947 B_diag = &b->A; 1948 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1949 A_diag_ncol = A_diag->cmap->N; 1950 B_diag_ilen = sub_B_diag->ilen; 1951 B_diag_i = sub_B_diag->i; 1952 1953 /* Set ilen for diagonal of B */ 1954 for (i=0; i<A_diag_ncol; i++) { 1955 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1956 } 1957 1958 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1959 very quickly (=without using MatSetValues), because all writes are local. */ 1960 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 1961 1962 /* copy over the B part */ 1963 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 1964 ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr); 1965 pbv = bv; 1966 row = A->rmap->rstart; 1967 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1968 cols_tmp = cols; 1969 for (i=0; i<mb; i++) { 1970 ncol = bi[i+1]-bi[i]; 1971 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr); 1972 row++; 1973 pbv += ncol; cols_tmp += ncol; 1974 } 1975 ierr = PetscFree(cols);CHKERRQ(ierr); 1976 ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr); 1977 1978 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1979 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1980 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1981 *matout = B; 1982 } else { 1983 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1984 } 1985 PetscFunctionReturn(0); 1986 } 1987 1988 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1989 { 1990 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1991 Mat a = aij->A,b = aij->B; 1992 PetscErrorCode ierr; 1993 PetscInt s1,s2,s3; 1994 1995 PetscFunctionBegin; 1996 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1997 if (rr) { 1998 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1999 PetscCheckFalse(s1!=s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2000 /* Overlap communication with computation. */ 2001 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2002 } 2003 if (ll) { 2004 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2005 PetscCheckFalse(s1!=s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2006 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 2007 } 2008 /* scale the diagonal block */ 2009 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2010 2011 if (rr) { 2012 /* Do a scatter end and then right scale the off-diagonal block */ 2013 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2014 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 2015 } 2016 PetscFunctionReturn(0); 2017 } 2018 2019 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2020 { 2021 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2022 PetscErrorCode ierr; 2023 2024 PetscFunctionBegin; 2025 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2026 PetscFunctionReturn(0); 2027 } 2028 2029 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2030 { 2031 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2032 Mat a,b,c,d; 2033 PetscBool flg; 2034 PetscErrorCode ierr; 2035 2036 PetscFunctionBegin; 2037 a = matA->A; b = matA->B; 2038 c = matB->A; d = matB->B; 2039 2040 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2041 if (flg) { 2042 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2043 } 2044 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2045 PetscFunctionReturn(0); 2046 } 2047 2048 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2049 { 2050 PetscErrorCode ierr; 2051 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2052 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2053 2054 PetscFunctionBegin; 2055 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2056 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2057 /* because of the column compression in the off-processor part of the matrix a->B, 2058 the number of columns in a->B and b->B may be different, hence we cannot call 2059 the MatCopy() directly on the two parts. If need be, we can provide a more 2060 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2061 then copying the submatrices */ 2062 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2063 } else { 2064 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2065 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2066 } 2067 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2068 PetscFunctionReturn(0); 2069 } 2070 2071 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2072 { 2073 PetscErrorCode ierr; 2074 2075 PetscFunctionBegin; 2076 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2077 PetscFunctionReturn(0); 2078 } 2079 2080 /* 2081 Computes the number of nonzeros per row needed for preallocation when X and Y 2082 have different nonzero structure. 2083 */ 2084 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2085 { 2086 PetscInt i,j,k,nzx,nzy; 2087 2088 PetscFunctionBegin; 2089 /* Set the number of nonzeros in the new matrix */ 2090 for (i=0; i<m; i++) { 2091 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2092 nzx = xi[i+1] - xi[i]; 2093 nzy = yi[i+1] - yi[i]; 2094 nnz[i] = 0; 2095 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2096 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2097 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2098 nnz[i]++; 2099 } 2100 for (; k<nzy; k++) nnz[i]++; 2101 } 2102 PetscFunctionReturn(0); 2103 } 2104 2105 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2106 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2107 { 2108 PetscErrorCode ierr; 2109 PetscInt m = Y->rmap->N; 2110 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2111 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2112 2113 PetscFunctionBegin; 2114 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2115 PetscFunctionReturn(0); 2116 } 2117 2118 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2119 { 2120 PetscErrorCode ierr; 2121 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2122 2123 PetscFunctionBegin; 2124 if (str == SAME_NONZERO_PATTERN) { 2125 ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr); 2126 ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr); 2127 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2128 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2129 } else { 2130 Mat B; 2131 PetscInt *nnz_d,*nnz_o; 2132 2133 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2134 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2135 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2136 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2137 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2138 ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr); 2139 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2140 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2141 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2142 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2143 ierr = MatHeaderMerge(Y,&B);CHKERRQ(ierr); 2144 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2145 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2146 } 2147 PetscFunctionReturn(0); 2148 } 2149 2150 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2151 2152 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2153 { 2154 #if defined(PETSC_USE_COMPLEX) 2155 PetscErrorCode ierr; 2156 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2157 2158 PetscFunctionBegin; 2159 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2160 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2161 #else 2162 PetscFunctionBegin; 2163 #endif 2164 PetscFunctionReturn(0); 2165 } 2166 2167 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2168 { 2169 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2170 PetscErrorCode ierr; 2171 2172 PetscFunctionBegin; 2173 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2174 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2175 PetscFunctionReturn(0); 2176 } 2177 2178 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2179 { 2180 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2181 PetscErrorCode ierr; 2182 2183 PetscFunctionBegin; 2184 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2185 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2186 PetscFunctionReturn(0); 2187 } 2188 2189 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2190 { 2191 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2192 PetscErrorCode ierr; 2193 PetscInt i,*idxb = NULL,m = A->rmap->n; 2194 PetscScalar *va,*vv; 2195 Vec vB,vA; 2196 const PetscScalar *vb; 2197 2198 PetscFunctionBegin; 2199 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 2200 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2201 2202 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2203 if (idx) { 2204 for (i=0; i<m; i++) { 2205 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2206 } 2207 } 2208 2209 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 2210 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 2211 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2212 2213 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2214 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2215 for (i=0; i<m; i++) { 2216 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2217 vv[i] = vb[i]; 2218 if (idx) idx[i] = a->garray[idxb[i]]; 2219 } else { 2220 vv[i] = va[i]; 2221 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2222 idx[i] = a->garray[idxb[i]]; 2223 } 2224 } 2225 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2226 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2227 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2228 ierr = PetscFree(idxb);CHKERRQ(ierr); 2229 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2230 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2231 PetscFunctionReturn(0); 2232 } 2233 2234 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2235 { 2236 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2237 PetscInt m = A->rmap->n,n = A->cmap->n; 2238 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2239 PetscInt *cmap = mat->garray; 2240 PetscInt *diagIdx, *offdiagIdx; 2241 Vec diagV, offdiagV; 2242 PetscScalar *a, *diagA, *offdiagA; 2243 const PetscScalar *ba,*bav; 2244 PetscInt r,j,col,ncols,*bi,*bj; 2245 PetscErrorCode ierr; 2246 Mat B = mat->B; 2247 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2248 2249 PetscFunctionBegin; 2250 /* When a process holds entire A and other processes have no entry */ 2251 if (A->cmap->N == n) { 2252 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2253 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2254 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr); 2255 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2256 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2257 PetscFunctionReturn(0); 2258 } else if (n == 0) { 2259 if (m) { 2260 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2261 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2262 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2263 } 2264 PetscFunctionReturn(0); 2265 } 2266 2267 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2268 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2269 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2270 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2271 2272 /* Get offdiagIdx[] for implicit 0.0 */ 2273 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2274 ba = bav; 2275 bi = b->i; 2276 bj = b->j; 2277 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2278 for (r = 0; r < m; r++) { 2279 ncols = bi[r+1] - bi[r]; 2280 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2281 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2282 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2283 offdiagA[r] = 0.0; 2284 2285 /* Find first hole in the cmap */ 2286 for (j=0; j<ncols; j++) { 2287 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2288 if (col > j && j < cstart) { 2289 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2290 break; 2291 } else if (col > j + n && j >= cstart) { 2292 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2293 break; 2294 } 2295 } 2296 if (j == ncols && ncols < A->cmap->N - n) { 2297 /* a hole is outside compressed Bcols */ 2298 if (ncols == 0) { 2299 if (cstart) { 2300 offdiagIdx[r] = 0; 2301 } else offdiagIdx[r] = cend; 2302 } else { /* ncols > 0 */ 2303 offdiagIdx[r] = cmap[ncols-1] + 1; 2304 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2305 } 2306 } 2307 } 2308 2309 for (j=0; j<ncols; j++) { 2310 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2311 ba++; bj++; 2312 } 2313 } 2314 2315 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2316 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2317 for (r = 0; r < m; ++r) { 2318 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2319 a[r] = diagA[r]; 2320 if (idx) idx[r] = cstart + diagIdx[r]; 2321 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2322 a[r] = diagA[r]; 2323 if (idx) { 2324 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2325 idx[r] = cstart + diagIdx[r]; 2326 } else idx[r] = offdiagIdx[r]; 2327 } 2328 } else { 2329 a[r] = offdiagA[r]; 2330 if (idx) idx[r] = offdiagIdx[r]; 2331 } 2332 } 2333 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2334 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2335 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2336 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2337 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2338 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2339 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2340 PetscFunctionReturn(0); 2341 } 2342 2343 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2344 { 2345 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2346 PetscInt m = A->rmap->n,n = A->cmap->n; 2347 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2348 PetscInt *cmap = mat->garray; 2349 PetscInt *diagIdx, *offdiagIdx; 2350 Vec diagV, offdiagV; 2351 PetscScalar *a, *diagA, *offdiagA; 2352 const PetscScalar *ba,*bav; 2353 PetscInt r,j,col,ncols,*bi,*bj; 2354 PetscErrorCode ierr; 2355 Mat B = mat->B; 2356 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2357 2358 PetscFunctionBegin; 2359 /* When a process holds entire A and other processes have no entry */ 2360 if (A->cmap->N == n) { 2361 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2362 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2363 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2364 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2365 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2366 PetscFunctionReturn(0); 2367 } else if (n == 0) { 2368 if (m) { 2369 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2370 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2371 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2372 } 2373 PetscFunctionReturn(0); 2374 } 2375 2376 ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2377 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2378 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2379 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2380 2381 /* Get offdiagIdx[] for implicit 0.0 */ 2382 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2383 ba = bav; 2384 bi = b->i; 2385 bj = b->j; 2386 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2387 for (r = 0; r < m; r++) { 2388 ncols = bi[r+1] - bi[r]; 2389 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2390 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2391 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2392 offdiagA[r] = 0.0; 2393 2394 /* Find first hole in the cmap */ 2395 for (j=0; j<ncols; j++) { 2396 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2397 if (col > j && j < cstart) { 2398 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2399 break; 2400 } else if (col > j + n && j >= cstart) { 2401 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2402 break; 2403 } 2404 } 2405 if (j == ncols && ncols < A->cmap->N - n) { 2406 /* a hole is outside compressed Bcols */ 2407 if (ncols == 0) { 2408 if (cstart) { 2409 offdiagIdx[r] = 0; 2410 } else offdiagIdx[r] = cend; 2411 } else { /* ncols > 0 */ 2412 offdiagIdx[r] = cmap[ncols-1] + 1; 2413 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2414 } 2415 } 2416 } 2417 2418 for (j=0; j<ncols; j++) { 2419 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2420 ba++; bj++; 2421 } 2422 } 2423 2424 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2425 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2426 for (r = 0; r < m; ++r) { 2427 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2428 a[r] = diagA[r]; 2429 if (idx) idx[r] = cstart + diagIdx[r]; 2430 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2431 a[r] = diagA[r]; 2432 if (idx) { 2433 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2434 idx[r] = cstart + diagIdx[r]; 2435 } else idx[r] = offdiagIdx[r]; 2436 } 2437 } else { 2438 a[r] = offdiagA[r]; 2439 if (idx) idx[r] = offdiagIdx[r]; 2440 } 2441 } 2442 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2443 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2444 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2445 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2446 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2447 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2448 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2449 PetscFunctionReturn(0); 2450 } 2451 2452 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2453 { 2454 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2455 PetscInt m = A->rmap->n,n = A->cmap->n; 2456 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2457 PetscInt *cmap = mat->garray; 2458 PetscInt *diagIdx, *offdiagIdx; 2459 Vec diagV, offdiagV; 2460 PetscScalar *a, *diagA, *offdiagA; 2461 const PetscScalar *ba,*bav; 2462 PetscInt r,j,col,ncols,*bi,*bj; 2463 PetscErrorCode ierr; 2464 Mat B = mat->B; 2465 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2466 2467 PetscFunctionBegin; 2468 /* When a process holds entire A and other processes have no entry */ 2469 if (A->cmap->N == n) { 2470 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2471 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2472 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2473 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2474 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2475 PetscFunctionReturn(0); 2476 } else if (n == 0) { 2477 if (m) { 2478 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2479 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2480 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2481 } 2482 PetscFunctionReturn(0); 2483 } 2484 2485 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2486 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2487 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2488 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2489 2490 /* Get offdiagIdx[] for implicit 0.0 */ 2491 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2492 ba = bav; 2493 bi = b->i; 2494 bj = b->j; 2495 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2496 for (r = 0; r < m; r++) { 2497 ncols = bi[r+1] - bi[r]; 2498 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2499 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2500 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2501 offdiagA[r] = 0.0; 2502 2503 /* Find first hole in the cmap */ 2504 for (j=0; j<ncols; j++) { 2505 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2506 if (col > j && j < cstart) { 2507 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2508 break; 2509 } else if (col > j + n && j >= cstart) { 2510 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2511 break; 2512 } 2513 } 2514 if (j == ncols && ncols < A->cmap->N - n) { 2515 /* a hole is outside compressed Bcols */ 2516 if (ncols == 0) { 2517 if (cstart) { 2518 offdiagIdx[r] = 0; 2519 } else offdiagIdx[r] = cend; 2520 } else { /* ncols > 0 */ 2521 offdiagIdx[r] = cmap[ncols-1] + 1; 2522 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2523 } 2524 } 2525 } 2526 2527 for (j=0; j<ncols; j++) { 2528 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2529 ba++; bj++; 2530 } 2531 } 2532 2533 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2534 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2535 for (r = 0; r < m; ++r) { 2536 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2537 a[r] = diagA[r]; 2538 if (idx) idx[r] = cstart + diagIdx[r]; 2539 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2540 a[r] = diagA[r]; 2541 if (idx) { 2542 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2543 idx[r] = cstart + diagIdx[r]; 2544 } else idx[r] = offdiagIdx[r]; 2545 } 2546 } else { 2547 a[r] = offdiagA[r]; 2548 if (idx) idx[r] = offdiagIdx[r]; 2549 } 2550 } 2551 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2552 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2553 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2554 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2555 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2556 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2557 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2558 PetscFunctionReturn(0); 2559 } 2560 2561 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2562 { 2563 PetscErrorCode ierr; 2564 Mat *dummy; 2565 2566 PetscFunctionBegin; 2567 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2568 *newmat = *dummy; 2569 ierr = PetscFree(dummy);CHKERRQ(ierr); 2570 PetscFunctionReturn(0); 2571 } 2572 2573 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2574 { 2575 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2576 PetscErrorCode ierr; 2577 2578 PetscFunctionBegin; 2579 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2580 A->factorerrortype = a->A->factorerrortype; 2581 PetscFunctionReturn(0); 2582 } 2583 2584 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2585 { 2586 PetscErrorCode ierr; 2587 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2588 2589 PetscFunctionBegin; 2590 PetscCheckFalse(!x->assembled && !x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2591 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2592 if (x->assembled) { 2593 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2594 } else { 2595 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2596 } 2597 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2598 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2599 PetscFunctionReturn(0); 2600 } 2601 2602 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2603 { 2604 PetscFunctionBegin; 2605 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2606 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2607 PetscFunctionReturn(0); 2608 } 2609 2610 /*@ 2611 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2612 2613 Collective on Mat 2614 2615 Input Parameters: 2616 + A - the matrix 2617 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2618 2619 Level: advanced 2620 2621 @*/ 2622 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2623 { 2624 PetscErrorCode ierr; 2625 2626 PetscFunctionBegin; 2627 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2628 PetscFunctionReturn(0); 2629 } 2630 2631 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2632 { 2633 PetscErrorCode ierr; 2634 PetscBool sc = PETSC_FALSE,flg; 2635 2636 PetscFunctionBegin; 2637 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2638 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2639 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2640 if (flg) { 2641 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2642 } 2643 ierr = PetscOptionsTail();CHKERRQ(ierr); 2644 PetscFunctionReturn(0); 2645 } 2646 2647 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2648 { 2649 PetscErrorCode ierr; 2650 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2651 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2652 2653 PetscFunctionBegin; 2654 if (!Y->preallocated) { 2655 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2656 } else if (!aij->nz) { 2657 PetscInt nonew = aij->nonew; 2658 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2659 aij->nonew = nonew; 2660 } 2661 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2662 PetscFunctionReturn(0); 2663 } 2664 2665 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2666 { 2667 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2668 PetscErrorCode ierr; 2669 2670 PetscFunctionBegin; 2671 PetscCheckFalse(A->rmap->n != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2672 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2673 if (d) { 2674 PetscInt rstart; 2675 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2676 *d += rstart; 2677 2678 } 2679 PetscFunctionReturn(0); 2680 } 2681 2682 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2683 { 2684 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2685 PetscErrorCode ierr; 2686 2687 PetscFunctionBegin; 2688 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2689 PetscFunctionReturn(0); 2690 } 2691 2692 /* -------------------------------------------------------------------*/ 2693 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2694 MatGetRow_MPIAIJ, 2695 MatRestoreRow_MPIAIJ, 2696 MatMult_MPIAIJ, 2697 /* 4*/ MatMultAdd_MPIAIJ, 2698 MatMultTranspose_MPIAIJ, 2699 MatMultTransposeAdd_MPIAIJ, 2700 NULL, 2701 NULL, 2702 NULL, 2703 /*10*/ NULL, 2704 NULL, 2705 NULL, 2706 MatSOR_MPIAIJ, 2707 MatTranspose_MPIAIJ, 2708 /*15*/ MatGetInfo_MPIAIJ, 2709 MatEqual_MPIAIJ, 2710 MatGetDiagonal_MPIAIJ, 2711 MatDiagonalScale_MPIAIJ, 2712 MatNorm_MPIAIJ, 2713 /*20*/ MatAssemblyBegin_MPIAIJ, 2714 MatAssemblyEnd_MPIAIJ, 2715 MatSetOption_MPIAIJ, 2716 MatZeroEntries_MPIAIJ, 2717 /*24*/ MatZeroRows_MPIAIJ, 2718 NULL, 2719 NULL, 2720 NULL, 2721 NULL, 2722 /*29*/ MatSetUp_MPIAIJ, 2723 NULL, 2724 NULL, 2725 MatGetDiagonalBlock_MPIAIJ, 2726 NULL, 2727 /*34*/ MatDuplicate_MPIAIJ, 2728 NULL, 2729 NULL, 2730 NULL, 2731 NULL, 2732 /*39*/ MatAXPY_MPIAIJ, 2733 MatCreateSubMatrices_MPIAIJ, 2734 MatIncreaseOverlap_MPIAIJ, 2735 MatGetValues_MPIAIJ, 2736 MatCopy_MPIAIJ, 2737 /*44*/ MatGetRowMax_MPIAIJ, 2738 MatScale_MPIAIJ, 2739 MatShift_MPIAIJ, 2740 MatDiagonalSet_MPIAIJ, 2741 MatZeroRowsColumns_MPIAIJ, 2742 /*49*/ MatSetRandom_MPIAIJ, 2743 NULL, 2744 NULL, 2745 NULL, 2746 NULL, 2747 /*54*/ MatFDColoringCreate_MPIXAIJ, 2748 NULL, 2749 MatSetUnfactored_MPIAIJ, 2750 MatPermute_MPIAIJ, 2751 NULL, 2752 /*59*/ MatCreateSubMatrix_MPIAIJ, 2753 MatDestroy_MPIAIJ, 2754 MatView_MPIAIJ, 2755 NULL, 2756 NULL, 2757 /*64*/ NULL, 2758 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2759 NULL, 2760 NULL, 2761 NULL, 2762 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2763 MatGetRowMinAbs_MPIAIJ, 2764 NULL, 2765 NULL, 2766 NULL, 2767 NULL, 2768 /*75*/ MatFDColoringApply_AIJ, 2769 MatSetFromOptions_MPIAIJ, 2770 NULL, 2771 NULL, 2772 MatFindZeroDiagonals_MPIAIJ, 2773 /*80*/ NULL, 2774 NULL, 2775 NULL, 2776 /*83*/ MatLoad_MPIAIJ, 2777 MatIsSymmetric_MPIAIJ, 2778 NULL, 2779 NULL, 2780 NULL, 2781 NULL, 2782 /*89*/ NULL, 2783 NULL, 2784 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2785 NULL, 2786 NULL, 2787 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2788 NULL, 2789 NULL, 2790 NULL, 2791 MatBindToCPU_MPIAIJ, 2792 /*99*/ MatProductSetFromOptions_MPIAIJ, 2793 NULL, 2794 NULL, 2795 MatConjugate_MPIAIJ, 2796 NULL, 2797 /*104*/MatSetValuesRow_MPIAIJ, 2798 MatRealPart_MPIAIJ, 2799 MatImaginaryPart_MPIAIJ, 2800 NULL, 2801 NULL, 2802 /*109*/NULL, 2803 NULL, 2804 MatGetRowMin_MPIAIJ, 2805 NULL, 2806 MatMissingDiagonal_MPIAIJ, 2807 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2808 NULL, 2809 MatGetGhosts_MPIAIJ, 2810 NULL, 2811 NULL, 2812 /*119*/MatMultDiagonalBlock_MPIAIJ, 2813 NULL, 2814 NULL, 2815 NULL, 2816 MatGetMultiProcBlock_MPIAIJ, 2817 /*124*/MatFindNonzeroRows_MPIAIJ, 2818 MatGetColumnReductions_MPIAIJ, 2819 MatInvertBlockDiagonal_MPIAIJ, 2820 MatInvertVariableBlockDiagonal_MPIAIJ, 2821 MatCreateSubMatricesMPI_MPIAIJ, 2822 /*129*/NULL, 2823 NULL, 2824 NULL, 2825 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2826 NULL, 2827 /*134*/NULL, 2828 NULL, 2829 NULL, 2830 NULL, 2831 NULL, 2832 /*139*/MatSetBlockSizes_MPIAIJ, 2833 NULL, 2834 NULL, 2835 MatFDColoringSetUp_MPIXAIJ, 2836 MatFindOffBlockDiagonalEntries_MPIAIJ, 2837 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2838 /*145*/NULL, 2839 NULL, 2840 NULL 2841 }; 2842 2843 /* ----------------------------------------------------------------------------------------*/ 2844 2845 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2846 { 2847 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2848 PetscErrorCode ierr; 2849 2850 PetscFunctionBegin; 2851 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2852 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2853 PetscFunctionReturn(0); 2854 } 2855 2856 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2857 { 2858 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2859 PetscErrorCode ierr; 2860 2861 PetscFunctionBegin; 2862 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2863 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2864 PetscFunctionReturn(0); 2865 } 2866 2867 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2868 { 2869 Mat_MPIAIJ *b; 2870 PetscErrorCode ierr; 2871 PetscMPIInt size; 2872 2873 PetscFunctionBegin; 2874 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2875 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2876 b = (Mat_MPIAIJ*)B->data; 2877 2878 #if defined(PETSC_USE_CTABLE) 2879 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2880 #else 2881 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2882 #endif 2883 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2884 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2885 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2886 2887 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2888 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 2889 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2890 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2891 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2892 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2893 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2894 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2895 2896 if (!B->preallocated) { 2897 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2898 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2899 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2900 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2901 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2902 } 2903 2904 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2905 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2906 B->preallocated = PETSC_TRUE; 2907 B->was_assembled = PETSC_FALSE; 2908 B->assembled = PETSC_FALSE; 2909 PetscFunctionReturn(0); 2910 } 2911 2912 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2913 { 2914 Mat_MPIAIJ *b; 2915 PetscErrorCode ierr; 2916 2917 PetscFunctionBegin; 2918 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2919 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2920 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2921 b = (Mat_MPIAIJ*)B->data; 2922 2923 #if defined(PETSC_USE_CTABLE) 2924 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2925 #else 2926 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2927 #endif 2928 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2929 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2930 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2931 2932 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2933 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2934 B->preallocated = PETSC_TRUE; 2935 B->was_assembled = PETSC_FALSE; 2936 B->assembled = PETSC_FALSE; 2937 PetscFunctionReturn(0); 2938 } 2939 2940 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2941 { 2942 Mat mat; 2943 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2944 PetscErrorCode ierr; 2945 2946 PetscFunctionBegin; 2947 *newmat = NULL; 2948 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2949 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2950 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2951 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2952 a = (Mat_MPIAIJ*)mat->data; 2953 2954 mat->factortype = matin->factortype; 2955 mat->assembled = matin->assembled; 2956 mat->insertmode = NOT_SET_VALUES; 2957 mat->preallocated = matin->preallocated; 2958 2959 a->size = oldmat->size; 2960 a->rank = oldmat->rank; 2961 a->donotstash = oldmat->donotstash; 2962 a->roworiented = oldmat->roworiented; 2963 a->rowindices = NULL; 2964 a->rowvalues = NULL; 2965 a->getrowactive = PETSC_FALSE; 2966 2967 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2968 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2969 2970 if (oldmat->colmap) { 2971 #if defined(PETSC_USE_CTABLE) 2972 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2973 #else 2974 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2975 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2976 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2977 #endif 2978 } else a->colmap = NULL; 2979 if (oldmat->garray) { 2980 PetscInt len; 2981 len = oldmat->B->cmap->n; 2982 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2983 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2984 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2985 } else a->garray = NULL; 2986 2987 /* It may happen MatDuplicate is called with a non-assembled matrix 2988 In fact, MatDuplicate only requires the matrix to be preallocated 2989 This may happen inside a DMCreateMatrix_Shell */ 2990 if (oldmat->lvec) { 2991 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2992 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2993 } 2994 if (oldmat->Mvctx) { 2995 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2996 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2997 } 2998 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2999 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3000 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3001 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3002 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3003 *newmat = mat; 3004 PetscFunctionReturn(0); 3005 } 3006 3007 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3008 { 3009 PetscBool isbinary, ishdf5; 3010 PetscErrorCode ierr; 3011 3012 PetscFunctionBegin; 3013 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 3014 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 3015 /* force binary viewer to load .info file if it has not yet done so */ 3016 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3017 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3018 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 3019 if (isbinary) { 3020 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 3021 } else if (ishdf5) { 3022 #if defined(PETSC_HAVE_HDF5) 3023 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 3024 #else 3025 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3026 #endif 3027 } else { 3028 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3029 } 3030 PetscFunctionReturn(0); 3031 } 3032 3033 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3034 { 3035 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3036 PetscInt *rowidxs,*colidxs; 3037 PetscScalar *matvals; 3038 PetscErrorCode ierr; 3039 3040 PetscFunctionBegin; 3041 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3042 3043 /* read in matrix header */ 3044 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3045 PetscCheckFalse(header[0] != MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3046 M = header[1]; N = header[2]; nz = header[3]; 3047 PetscCheckFalse(M < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3048 PetscCheckFalse(N < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3049 PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3050 3051 /* set block sizes from the viewer's .info file */ 3052 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3053 /* set global sizes if not set already */ 3054 if (mat->rmap->N < 0) mat->rmap->N = M; 3055 if (mat->cmap->N < 0) mat->cmap->N = N; 3056 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3057 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3058 3059 /* check if the matrix sizes are correct */ 3060 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3061 PetscCheckFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3062 3063 /* read in row lengths and build row indices */ 3064 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3065 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3066 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3067 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3068 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr); 3069 PetscCheckFalse(sum != nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3070 /* read in column indices and matrix values */ 3071 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3072 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3073 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3074 /* store matrix indices and values */ 3075 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3076 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3077 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3078 PetscFunctionReturn(0); 3079 } 3080 3081 /* Not scalable because of ISAllGather() unless getting all columns. */ 3082 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3083 { 3084 PetscErrorCode ierr; 3085 IS iscol_local; 3086 PetscBool isstride; 3087 PetscMPIInt lisstride=0,gisstride; 3088 3089 PetscFunctionBegin; 3090 /* check if we are grabbing all columns*/ 3091 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3092 3093 if (isstride) { 3094 PetscInt start,len,mstart,mlen; 3095 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3096 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3097 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3098 if (mstart == start && mlen-mstart == len) lisstride = 1; 3099 } 3100 3101 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3102 if (gisstride) { 3103 PetscInt N; 3104 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3105 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3106 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3107 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3108 } else { 3109 PetscInt cbs; 3110 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3111 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3112 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3113 } 3114 3115 *isseq = iscol_local; 3116 PetscFunctionReturn(0); 3117 } 3118 3119 /* 3120 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3121 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3122 3123 Input Parameters: 3124 mat - matrix 3125 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3126 i.e., mat->rstart <= isrow[i] < mat->rend 3127 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3128 i.e., mat->cstart <= iscol[i] < mat->cend 3129 Output Parameter: 3130 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3131 iscol_o - sequential column index set for retrieving mat->B 3132 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3133 */ 3134 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3135 { 3136 PetscErrorCode ierr; 3137 Vec x,cmap; 3138 const PetscInt *is_idx; 3139 PetscScalar *xarray,*cmaparray; 3140 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3141 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3142 Mat B=a->B; 3143 Vec lvec=a->lvec,lcmap; 3144 PetscInt i,cstart,cend,Bn=B->cmap->N; 3145 MPI_Comm comm; 3146 VecScatter Mvctx=a->Mvctx; 3147 3148 PetscFunctionBegin; 3149 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3150 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3151 3152 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3153 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3154 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3155 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3156 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3157 3158 /* Get start indices */ 3159 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3160 isstart -= ncols; 3161 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3162 3163 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3164 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3165 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3166 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3167 for (i=0; i<ncols; i++) { 3168 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3169 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3170 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3171 } 3172 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3173 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3174 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3175 3176 /* Get iscol_d */ 3177 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3178 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3179 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3180 3181 /* Get isrow_d */ 3182 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3183 rstart = mat->rmap->rstart; 3184 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3185 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3186 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3187 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3188 3189 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3190 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3191 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3192 3193 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3194 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3195 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3196 3197 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3198 3199 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3200 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3201 3202 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3203 /* off-process column indices */ 3204 count = 0; 3205 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3206 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3207 3208 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3209 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3210 for (i=0; i<Bn; i++) { 3211 if (PetscRealPart(xarray[i]) > -1.0) { 3212 idx[count] = i; /* local column index in off-diagonal part B */ 3213 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3214 count++; 3215 } 3216 } 3217 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3218 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3219 3220 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3221 /* cannot ensure iscol_o has same blocksize as iscol! */ 3222 3223 ierr = PetscFree(idx);CHKERRQ(ierr); 3224 *garray = cmap1; 3225 3226 ierr = VecDestroy(&x);CHKERRQ(ierr); 3227 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3228 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3229 PetscFunctionReturn(0); 3230 } 3231 3232 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3233 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3234 { 3235 PetscErrorCode ierr; 3236 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3237 Mat M = NULL; 3238 MPI_Comm comm; 3239 IS iscol_d,isrow_d,iscol_o; 3240 Mat Asub = NULL,Bsub = NULL; 3241 PetscInt n; 3242 3243 PetscFunctionBegin; 3244 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3245 3246 if (call == MAT_REUSE_MATRIX) { 3247 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3248 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3249 PetscCheckFalse(!isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3250 3251 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3252 PetscCheckFalse(!iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3253 3254 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3255 PetscCheckFalse(!iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3256 3257 /* Update diagonal and off-diagonal portions of submat */ 3258 asub = (Mat_MPIAIJ*)(*submat)->data; 3259 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3260 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3261 if (n) { 3262 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3263 } 3264 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3265 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3266 3267 } else { /* call == MAT_INITIAL_MATRIX) */ 3268 const PetscInt *garray; 3269 PetscInt BsubN; 3270 3271 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3272 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3273 3274 /* Create local submatrices Asub and Bsub */ 3275 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3276 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3277 3278 /* Create submatrix M */ 3279 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3280 3281 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3282 asub = (Mat_MPIAIJ*)M->data; 3283 3284 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3285 n = asub->B->cmap->N; 3286 if (BsubN > n) { 3287 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3288 const PetscInt *idx; 3289 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3290 ierr = PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3291 3292 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3293 j = 0; 3294 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3295 for (i=0; i<n; i++) { 3296 if (j >= BsubN) break; 3297 while (subgarray[i] > garray[j]) j++; 3298 3299 if (subgarray[i] == garray[j]) { 3300 idx_new[i] = idx[j++]; 3301 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]); 3302 } 3303 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3304 3305 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3306 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3307 3308 } else if (BsubN < n) { 3309 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N); 3310 } 3311 3312 ierr = PetscFree(garray);CHKERRQ(ierr); 3313 *submat = M; 3314 3315 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3316 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3317 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3318 3319 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3320 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3321 3322 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3323 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3324 } 3325 PetscFunctionReturn(0); 3326 } 3327 3328 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3329 { 3330 PetscErrorCode ierr; 3331 IS iscol_local=NULL,isrow_d; 3332 PetscInt csize; 3333 PetscInt n,i,j,start,end; 3334 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3335 MPI_Comm comm; 3336 3337 PetscFunctionBegin; 3338 /* If isrow has same processor distribution as mat, 3339 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3340 if (call == MAT_REUSE_MATRIX) { 3341 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3342 if (isrow_d) { 3343 sameRowDist = PETSC_TRUE; 3344 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3345 } else { 3346 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3347 if (iscol_local) { 3348 sameRowDist = PETSC_TRUE; 3349 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3350 } 3351 } 3352 } else { 3353 /* Check if isrow has same processor distribution as mat */ 3354 sameDist[0] = PETSC_FALSE; 3355 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3356 if (!n) { 3357 sameDist[0] = PETSC_TRUE; 3358 } else { 3359 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3360 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3361 if (i >= start && j < end) { 3362 sameDist[0] = PETSC_TRUE; 3363 } 3364 } 3365 3366 /* Check if iscol has same processor distribution as mat */ 3367 sameDist[1] = PETSC_FALSE; 3368 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3369 if (!n) { 3370 sameDist[1] = PETSC_TRUE; 3371 } else { 3372 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3373 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3374 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3375 } 3376 3377 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3378 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr); 3379 sameRowDist = tsameDist[0]; 3380 } 3381 3382 if (sameRowDist) { 3383 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3384 /* isrow and iscol have same processor distribution as mat */ 3385 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3386 PetscFunctionReturn(0); 3387 } else { /* sameRowDist */ 3388 /* isrow has same processor distribution as mat */ 3389 if (call == MAT_INITIAL_MATRIX) { 3390 PetscBool sorted; 3391 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3392 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3393 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3394 PetscCheckFalse(n != i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i); 3395 3396 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3397 if (sorted) { 3398 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3399 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3400 PetscFunctionReturn(0); 3401 } 3402 } else { /* call == MAT_REUSE_MATRIX */ 3403 IS iscol_sub; 3404 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3405 if (iscol_sub) { 3406 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3407 PetscFunctionReturn(0); 3408 } 3409 } 3410 } 3411 } 3412 3413 /* General case: iscol -> iscol_local which has global size of iscol */ 3414 if (call == MAT_REUSE_MATRIX) { 3415 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3416 PetscCheckFalse(!iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3417 } else { 3418 if (!iscol_local) { 3419 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3420 } 3421 } 3422 3423 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3424 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3425 3426 if (call == MAT_INITIAL_MATRIX) { 3427 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3428 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3429 } 3430 PetscFunctionReturn(0); 3431 } 3432 3433 /*@C 3434 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3435 and "off-diagonal" part of the matrix in CSR format. 3436 3437 Collective 3438 3439 Input Parameters: 3440 + comm - MPI communicator 3441 . A - "diagonal" portion of matrix 3442 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3443 - garray - global index of B columns 3444 3445 Output Parameter: 3446 . mat - the matrix, with input A as its local diagonal matrix 3447 Level: advanced 3448 3449 Notes: 3450 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3451 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3452 3453 .seealso: MatCreateMPIAIJWithSplitArrays() 3454 @*/ 3455 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3456 { 3457 PetscErrorCode ierr; 3458 Mat_MPIAIJ *maij; 3459 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3460 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3461 const PetscScalar *oa; 3462 Mat Bnew; 3463 PetscInt m,n,N; 3464 3465 PetscFunctionBegin; 3466 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3467 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3468 PetscCheckFalse(m != B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N); 3469 PetscCheckFalse(A->rmap->bs != B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs); 3470 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3471 /* PetscCheckFalse(A->cmap->bs != B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3472 3473 /* Get global columns of mat */ 3474 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3475 3476 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3477 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3478 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3479 maij = (Mat_MPIAIJ*)(*mat)->data; 3480 3481 (*mat)->preallocated = PETSC_TRUE; 3482 3483 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3484 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3485 3486 /* Set A as diagonal portion of *mat */ 3487 maij->A = A; 3488 3489 nz = oi[m]; 3490 for (i=0; i<nz; i++) { 3491 col = oj[i]; 3492 oj[i] = garray[col]; 3493 } 3494 3495 /* Set Bnew as off-diagonal portion of *mat */ 3496 ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr); 3497 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr); 3498 ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr); 3499 bnew = (Mat_SeqAIJ*)Bnew->data; 3500 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3501 maij->B = Bnew; 3502 3503 PetscCheckFalse(B->rmap->N != Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N); 3504 3505 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3506 b->free_a = PETSC_FALSE; 3507 b->free_ij = PETSC_FALSE; 3508 ierr = MatDestroy(&B);CHKERRQ(ierr); 3509 3510 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3511 bnew->free_a = PETSC_TRUE; 3512 bnew->free_ij = PETSC_TRUE; 3513 3514 /* condense columns of maij->B */ 3515 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3516 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3517 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3518 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3519 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3520 PetscFunctionReturn(0); 3521 } 3522 3523 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3524 3525 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3526 { 3527 PetscErrorCode ierr; 3528 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3529 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3530 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3531 Mat M,Msub,B=a->B; 3532 MatScalar *aa; 3533 Mat_SeqAIJ *aij; 3534 PetscInt *garray = a->garray,*colsub,Ncols; 3535 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3536 IS iscol_sub,iscmap; 3537 const PetscInt *is_idx,*cmap; 3538 PetscBool allcolumns=PETSC_FALSE; 3539 MPI_Comm comm; 3540 3541 PetscFunctionBegin; 3542 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3543 if (call == MAT_REUSE_MATRIX) { 3544 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3545 PetscCheckFalse(!iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3546 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3547 3548 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3549 PetscCheckFalse(!iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3550 3551 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3552 PetscCheckFalse(!Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3553 3554 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3555 3556 } else { /* call == MAT_INITIAL_MATRIX) */ 3557 PetscBool flg; 3558 3559 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3560 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3561 3562 /* (1) iscol -> nonscalable iscol_local */ 3563 /* Check for special case: each processor gets entire matrix columns */ 3564 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3565 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3566 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3567 if (allcolumns) { 3568 iscol_sub = iscol_local; 3569 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3570 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3571 3572 } else { 3573 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3574 PetscInt *idx,*cmap1,k; 3575 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3576 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3577 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3578 count = 0; 3579 k = 0; 3580 for (i=0; i<Ncols; i++) { 3581 j = is_idx[i]; 3582 if (j >= cstart && j < cend) { 3583 /* diagonal part of mat */ 3584 idx[count] = j; 3585 cmap1[count++] = i; /* column index in submat */ 3586 } else if (Bn) { 3587 /* off-diagonal part of mat */ 3588 if (j == garray[k]) { 3589 idx[count] = j; 3590 cmap1[count++] = i; /* column index in submat */ 3591 } else if (j > garray[k]) { 3592 while (j > garray[k] && k < Bn-1) k++; 3593 if (j == garray[k]) { 3594 idx[count] = j; 3595 cmap1[count++] = i; /* column index in submat */ 3596 } 3597 } 3598 } 3599 } 3600 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3601 3602 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3603 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3604 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3605 3606 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3607 } 3608 3609 /* (3) Create sequential Msub */ 3610 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3611 } 3612 3613 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3614 aij = (Mat_SeqAIJ*)(Msub)->data; 3615 ii = aij->i; 3616 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3617 3618 /* 3619 m - number of local rows 3620 Ncols - number of columns (same on all processors) 3621 rstart - first row in new global matrix generated 3622 */ 3623 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3624 3625 if (call == MAT_INITIAL_MATRIX) { 3626 /* (4) Create parallel newmat */ 3627 PetscMPIInt rank,size; 3628 PetscInt csize; 3629 3630 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3631 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3632 3633 /* 3634 Determine the number of non-zeros in the diagonal and off-diagonal 3635 portions of the matrix in order to do correct preallocation 3636 */ 3637 3638 /* first get start and end of "diagonal" columns */ 3639 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3640 if (csize == PETSC_DECIDE) { 3641 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3642 if (mglobal == Ncols) { /* square matrix */ 3643 nlocal = m; 3644 } else { 3645 nlocal = Ncols/size + ((Ncols % size) > rank); 3646 } 3647 } else { 3648 nlocal = csize; 3649 } 3650 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3651 rstart = rend - nlocal; 3652 PetscCheckFalse(rank == size - 1 && rend != Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols); 3653 3654 /* next, compute all the lengths */ 3655 jj = aij->j; 3656 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3657 olens = dlens + m; 3658 for (i=0; i<m; i++) { 3659 jend = ii[i+1] - ii[i]; 3660 olen = 0; 3661 dlen = 0; 3662 for (j=0; j<jend; j++) { 3663 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3664 else dlen++; 3665 jj++; 3666 } 3667 olens[i] = olen; 3668 dlens[i] = dlen; 3669 } 3670 3671 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3672 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3673 3674 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3675 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3676 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3677 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3678 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3679 ierr = PetscFree(dlens);CHKERRQ(ierr); 3680 3681 } else { /* call == MAT_REUSE_MATRIX */ 3682 M = *newmat; 3683 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3684 PetscCheckFalse(i != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3685 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3686 /* 3687 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3688 rather than the slower MatSetValues(). 3689 */ 3690 M->was_assembled = PETSC_TRUE; 3691 M->assembled = PETSC_FALSE; 3692 } 3693 3694 /* (5) Set values of Msub to *newmat */ 3695 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3696 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3697 3698 jj = aij->j; 3699 ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3700 for (i=0; i<m; i++) { 3701 row = rstart + i; 3702 nz = ii[i+1] - ii[i]; 3703 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3704 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3705 jj += nz; aa += nz; 3706 } 3707 ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3708 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3709 3710 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3711 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3712 3713 ierr = PetscFree(colsub);CHKERRQ(ierr); 3714 3715 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3716 if (call == MAT_INITIAL_MATRIX) { 3717 *newmat = M; 3718 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3719 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3720 3721 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3722 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3723 3724 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3725 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3726 3727 if (iscol_local) { 3728 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3729 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3730 } 3731 } 3732 PetscFunctionReturn(0); 3733 } 3734 3735 /* 3736 Not great since it makes two copies of the submatrix, first an SeqAIJ 3737 in local and then by concatenating the local matrices the end result. 3738 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3739 3740 Note: This requires a sequential iscol with all indices. 3741 */ 3742 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3743 { 3744 PetscErrorCode ierr; 3745 PetscMPIInt rank,size; 3746 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3747 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3748 Mat M,Mreuse; 3749 MatScalar *aa,*vwork; 3750 MPI_Comm comm; 3751 Mat_SeqAIJ *aij; 3752 PetscBool colflag,allcolumns=PETSC_FALSE; 3753 3754 PetscFunctionBegin; 3755 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3756 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3757 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3758 3759 /* Check for special case: each processor gets entire matrix columns */ 3760 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3761 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3762 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3763 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3764 3765 if (call == MAT_REUSE_MATRIX) { 3766 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3767 PetscCheckFalse(!Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3768 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3769 } else { 3770 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3771 } 3772 3773 /* 3774 m - number of local rows 3775 n - number of columns (same on all processors) 3776 rstart - first row in new global matrix generated 3777 */ 3778 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3779 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3780 if (call == MAT_INITIAL_MATRIX) { 3781 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3782 ii = aij->i; 3783 jj = aij->j; 3784 3785 /* 3786 Determine the number of non-zeros in the diagonal and off-diagonal 3787 portions of the matrix in order to do correct preallocation 3788 */ 3789 3790 /* first get start and end of "diagonal" columns */ 3791 if (csize == PETSC_DECIDE) { 3792 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3793 if (mglobal == n) { /* square matrix */ 3794 nlocal = m; 3795 } else { 3796 nlocal = n/size + ((n % size) > rank); 3797 } 3798 } else { 3799 nlocal = csize; 3800 } 3801 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3802 rstart = rend - nlocal; 3803 PetscCheckFalse(rank == size - 1 && rend != n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 3804 3805 /* next, compute all the lengths */ 3806 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3807 olens = dlens + m; 3808 for (i=0; i<m; i++) { 3809 jend = ii[i+1] - ii[i]; 3810 olen = 0; 3811 dlen = 0; 3812 for (j=0; j<jend; j++) { 3813 if (*jj < rstart || *jj >= rend) olen++; 3814 else dlen++; 3815 jj++; 3816 } 3817 olens[i] = olen; 3818 dlens[i] = dlen; 3819 } 3820 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3821 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3822 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3823 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3824 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3825 ierr = PetscFree(dlens);CHKERRQ(ierr); 3826 } else { 3827 PetscInt ml,nl; 3828 3829 M = *newmat; 3830 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3831 PetscCheckFalse(ml != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3832 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3833 /* 3834 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3835 rather than the slower MatSetValues(). 3836 */ 3837 M->was_assembled = PETSC_TRUE; 3838 M->assembled = PETSC_FALSE; 3839 } 3840 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3841 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3842 ii = aij->i; 3843 jj = aij->j; 3844 3845 /* trigger copy to CPU if needed */ 3846 ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3847 for (i=0; i<m; i++) { 3848 row = rstart + i; 3849 nz = ii[i+1] - ii[i]; 3850 cwork = jj; jj += nz; 3851 vwork = aa; aa += nz; 3852 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3853 } 3854 ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3855 3856 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3857 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3858 *newmat = M; 3859 3860 /* save submatrix used in processor for next request */ 3861 if (call == MAT_INITIAL_MATRIX) { 3862 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3863 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3864 } 3865 PetscFunctionReturn(0); 3866 } 3867 3868 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3869 { 3870 PetscInt m,cstart, cend,j,nnz,i,d; 3871 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3872 const PetscInt *JJ; 3873 PetscErrorCode ierr; 3874 PetscBool nooffprocentries; 3875 3876 PetscFunctionBegin; 3877 PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]); 3878 3879 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3880 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3881 m = B->rmap->n; 3882 cstart = B->cmap->rstart; 3883 cend = B->cmap->rend; 3884 rstart = B->rmap->rstart; 3885 3886 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3887 3888 if (PetscDefined(USE_DEBUG)) { 3889 for (i=0; i<m; i++) { 3890 nnz = Ii[i+1]- Ii[i]; 3891 JJ = J + Ii[i]; 3892 PetscCheckFalse(nnz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz); 3893 PetscCheckFalse(nnz && (JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]); 3894 PetscCheckFalse(nnz && (JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N); 3895 } 3896 } 3897 3898 for (i=0; i<m; i++) { 3899 nnz = Ii[i+1]- Ii[i]; 3900 JJ = J + Ii[i]; 3901 nnz_max = PetscMax(nnz_max,nnz); 3902 d = 0; 3903 for (j=0; j<nnz; j++) { 3904 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3905 } 3906 d_nnz[i] = d; 3907 o_nnz[i] = nnz - d; 3908 } 3909 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3910 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3911 3912 for (i=0; i<m; i++) { 3913 ii = i + rstart; 3914 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3915 } 3916 nooffprocentries = B->nooffprocentries; 3917 B->nooffprocentries = PETSC_TRUE; 3918 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3919 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3920 B->nooffprocentries = nooffprocentries; 3921 3922 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3923 PetscFunctionReturn(0); 3924 } 3925 3926 /*@ 3927 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3928 (the default parallel PETSc format). 3929 3930 Collective 3931 3932 Input Parameters: 3933 + B - the matrix 3934 . i - the indices into j for the start of each local row (starts with zero) 3935 . j - the column indices for each local row (starts with zero) 3936 - v - optional values in the matrix 3937 3938 Level: developer 3939 3940 Notes: 3941 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3942 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3943 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3944 3945 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3946 3947 The format which is used for the sparse matrix input, is equivalent to a 3948 row-major ordering.. i.e for the following matrix, the input data expected is 3949 as shown 3950 3951 $ 1 0 0 3952 $ 2 0 3 P0 3953 $ ------- 3954 $ 4 5 6 P1 3955 $ 3956 $ Process0 [P0]: rows_owned=[0,1] 3957 $ i = {0,1,3} [size = nrow+1 = 2+1] 3958 $ j = {0,0,2} [size = 3] 3959 $ v = {1,2,3} [size = 3] 3960 $ 3961 $ Process1 [P1]: rows_owned=[2] 3962 $ i = {0,3} [size = nrow+1 = 1+1] 3963 $ j = {0,1,2} [size = 3] 3964 $ v = {4,5,6} [size = 3] 3965 3966 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3967 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3968 @*/ 3969 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3970 { 3971 PetscErrorCode ierr; 3972 3973 PetscFunctionBegin; 3974 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3975 PetscFunctionReturn(0); 3976 } 3977 3978 /*@C 3979 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3980 (the default parallel PETSc format). For good matrix assembly performance 3981 the user should preallocate the matrix storage by setting the parameters 3982 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3983 performance can be increased by more than a factor of 50. 3984 3985 Collective 3986 3987 Input Parameters: 3988 + B - the matrix 3989 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3990 (same value is used for all local rows) 3991 . d_nnz - array containing the number of nonzeros in the various rows of the 3992 DIAGONAL portion of the local submatrix (possibly different for each row) 3993 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3994 The size of this array is equal to the number of local rows, i.e 'm'. 3995 For matrices that will be factored, you must leave room for (and set) 3996 the diagonal entry even if it is zero. 3997 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3998 submatrix (same value is used for all local rows). 3999 - o_nnz - array containing the number of nonzeros in the various rows of the 4000 OFF-DIAGONAL portion of the local submatrix (possibly different for 4001 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4002 structure. The size of this array is equal to the number 4003 of local rows, i.e 'm'. 4004 4005 If the *_nnz parameter is given then the *_nz parameter is ignored 4006 4007 The AIJ format (also called the Yale sparse matrix format or 4008 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4009 storage. The stored row and column indices begin with zero. 4010 See Users-Manual: ch_mat for details. 4011 4012 The parallel matrix is partitioned such that the first m0 rows belong to 4013 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4014 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4015 4016 The DIAGONAL portion of the local submatrix of a processor can be defined 4017 as the submatrix which is obtained by extraction the part corresponding to 4018 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4019 first row that belongs to the processor, r2 is the last row belonging to 4020 the this processor, and c1-c2 is range of indices of the local part of a 4021 vector suitable for applying the matrix to. This is an mxn matrix. In the 4022 common case of a square matrix, the row and column ranges are the same and 4023 the DIAGONAL part is also square. The remaining portion of the local 4024 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4025 4026 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4027 4028 You can call MatGetInfo() to get information on how effective the preallocation was; 4029 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4030 You can also run with the option -info and look for messages with the string 4031 malloc in them to see if additional memory allocation was needed. 4032 4033 Example usage: 4034 4035 Consider the following 8x8 matrix with 34 non-zero values, that is 4036 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4037 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4038 as follows: 4039 4040 .vb 4041 1 2 0 | 0 3 0 | 0 4 4042 Proc0 0 5 6 | 7 0 0 | 8 0 4043 9 0 10 | 11 0 0 | 12 0 4044 ------------------------------------- 4045 13 0 14 | 15 16 17 | 0 0 4046 Proc1 0 18 0 | 19 20 21 | 0 0 4047 0 0 0 | 22 23 0 | 24 0 4048 ------------------------------------- 4049 Proc2 25 26 27 | 0 0 28 | 29 0 4050 30 0 0 | 31 32 33 | 0 34 4051 .ve 4052 4053 This can be represented as a collection of submatrices as: 4054 4055 .vb 4056 A B C 4057 D E F 4058 G H I 4059 .ve 4060 4061 Where the submatrices A,B,C are owned by proc0, D,E,F are 4062 owned by proc1, G,H,I are owned by proc2. 4063 4064 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4065 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4066 The 'M','N' parameters are 8,8, and have the same values on all procs. 4067 4068 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4069 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4070 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4071 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4072 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4073 matrix, ans [DF] as another SeqAIJ matrix. 4074 4075 When d_nz, o_nz parameters are specified, d_nz storage elements are 4076 allocated for every row of the local diagonal submatrix, and o_nz 4077 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4078 One way to choose d_nz and o_nz is to use the max nonzerors per local 4079 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4080 In this case, the values of d_nz,o_nz are: 4081 .vb 4082 proc0 : dnz = 2, o_nz = 2 4083 proc1 : dnz = 3, o_nz = 2 4084 proc2 : dnz = 1, o_nz = 4 4085 .ve 4086 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4087 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4088 for proc3. i.e we are using 12+15+10=37 storage locations to store 4089 34 values. 4090 4091 When d_nnz, o_nnz parameters are specified, the storage is specified 4092 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4093 In the above case the values for d_nnz,o_nnz are: 4094 .vb 4095 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4096 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4097 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4098 .ve 4099 Here the space allocated is sum of all the above values i.e 34, and 4100 hence pre-allocation is perfect. 4101 4102 Level: intermediate 4103 4104 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4105 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4106 @*/ 4107 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4108 { 4109 PetscErrorCode ierr; 4110 4111 PetscFunctionBegin; 4112 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4113 PetscValidType(B,1); 4114 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4115 PetscFunctionReturn(0); 4116 } 4117 4118 /*@ 4119 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4120 CSR format for the local rows. 4121 4122 Collective 4123 4124 Input Parameters: 4125 + comm - MPI communicator 4126 . m - number of local rows (Cannot be PETSC_DECIDE) 4127 . n - This value should be the same as the local size used in creating the 4128 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4129 calculated if N is given) For square matrices n is almost always m. 4130 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4131 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4132 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4133 . j - column indices 4134 - a - matrix values 4135 4136 Output Parameter: 4137 . mat - the matrix 4138 4139 Level: intermediate 4140 4141 Notes: 4142 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4143 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4144 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4145 4146 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4147 4148 The format which is used for the sparse matrix input, is equivalent to a 4149 row-major ordering.. i.e for the following matrix, the input data expected is 4150 as shown 4151 4152 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4153 4154 $ 1 0 0 4155 $ 2 0 3 P0 4156 $ ------- 4157 $ 4 5 6 P1 4158 $ 4159 $ Process0 [P0]: rows_owned=[0,1] 4160 $ i = {0,1,3} [size = nrow+1 = 2+1] 4161 $ j = {0,0,2} [size = 3] 4162 $ v = {1,2,3} [size = 3] 4163 $ 4164 $ Process1 [P1]: rows_owned=[2] 4165 $ i = {0,3} [size = nrow+1 = 1+1] 4166 $ j = {0,1,2} [size = 3] 4167 $ v = {4,5,6} [size = 3] 4168 4169 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4170 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4171 @*/ 4172 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4173 { 4174 PetscErrorCode ierr; 4175 4176 PetscFunctionBegin; 4177 PetscCheckFalse(i && i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4178 PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4179 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4180 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4181 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4182 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4183 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4184 PetscFunctionReturn(0); 4185 } 4186 4187 /*@ 4188 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4189 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4190 4191 Collective 4192 4193 Input Parameters: 4194 + mat - the matrix 4195 . m - number of local rows (Cannot be PETSC_DECIDE) 4196 . n - This value should be the same as the local size used in creating the 4197 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4198 calculated if N is given) For square matrices n is almost always m. 4199 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4200 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4201 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4202 . J - column indices 4203 - v - matrix values 4204 4205 Level: intermediate 4206 4207 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4208 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4209 @*/ 4210 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4211 { 4212 PetscErrorCode ierr; 4213 PetscInt cstart,nnz,i,j; 4214 PetscInt *ld; 4215 PetscBool nooffprocentries; 4216 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4217 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4218 PetscScalar *ad,*ao; 4219 const PetscInt *Adi = Ad->i; 4220 PetscInt ldi,Iii,md; 4221 4222 PetscFunctionBegin; 4223 PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4224 PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4225 PetscCheckFalse(m != mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4226 PetscCheckFalse(n != mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4227 4228 ierr = MatSeqAIJGetArrayWrite(Aij->A,&ad);CHKERRQ(ierr); 4229 ierr = MatSeqAIJGetArrayWrite(Aij->B,&ao);CHKERRQ(ierr); 4230 cstart = mat->cmap->rstart; 4231 if (!Aij->ld) { 4232 /* count number of entries below block diagonal */ 4233 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4234 Aij->ld = ld; 4235 for (i=0; i<m; i++) { 4236 nnz = Ii[i+1]- Ii[i]; 4237 j = 0; 4238 while (J[j] < cstart && j < nnz) {j++;} 4239 J += nnz; 4240 ld[i] = j; 4241 } 4242 } else { 4243 ld = Aij->ld; 4244 } 4245 4246 for (i=0; i<m; i++) { 4247 nnz = Ii[i+1]- Ii[i]; 4248 Iii = Ii[i]; 4249 ldi = ld[i]; 4250 md = Adi[i+1]-Adi[i]; 4251 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4252 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4253 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4254 ad += md; 4255 ao += nnz - md; 4256 } 4257 nooffprocentries = mat->nooffprocentries; 4258 mat->nooffprocentries = PETSC_TRUE; 4259 ierr = MatSeqAIJRestoreArrayWrite(Aij->A,&ad);CHKERRQ(ierr); 4260 ierr = MatSeqAIJRestoreArrayWrite(Aij->B,&ao);CHKERRQ(ierr); 4261 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4262 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4263 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4264 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4265 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4266 mat->nooffprocentries = nooffprocentries; 4267 PetscFunctionReturn(0); 4268 } 4269 4270 /*@C 4271 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4272 (the default parallel PETSc format). For good matrix assembly performance 4273 the user should preallocate the matrix storage by setting the parameters 4274 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4275 performance can be increased by more than a factor of 50. 4276 4277 Collective 4278 4279 Input Parameters: 4280 + comm - MPI communicator 4281 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4282 This value should be the same as the local size used in creating the 4283 y vector for the matrix-vector product y = Ax. 4284 . n - This value should be the same as the local size used in creating the 4285 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4286 calculated if N is given) For square matrices n is almost always m. 4287 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4288 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4289 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4290 (same value is used for all local rows) 4291 . d_nnz - array containing the number of nonzeros in the various rows of the 4292 DIAGONAL portion of the local submatrix (possibly different for each row) 4293 or NULL, if d_nz is used to specify the nonzero structure. 4294 The size of this array is equal to the number of local rows, i.e 'm'. 4295 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4296 submatrix (same value is used for all local rows). 4297 - o_nnz - array containing the number of nonzeros in the various rows of the 4298 OFF-DIAGONAL portion of the local submatrix (possibly different for 4299 each row) or NULL, if o_nz is used to specify the nonzero 4300 structure. The size of this array is equal to the number 4301 of local rows, i.e 'm'. 4302 4303 Output Parameter: 4304 . A - the matrix 4305 4306 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4307 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4308 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4309 4310 Notes: 4311 If the *_nnz parameter is given then the *_nz parameter is ignored 4312 4313 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4314 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4315 storage requirements for this matrix. 4316 4317 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4318 processor than it must be used on all processors that share the object for 4319 that argument. 4320 4321 The user MUST specify either the local or global matrix dimensions 4322 (possibly both). 4323 4324 The parallel matrix is partitioned across processors such that the 4325 first m0 rows belong to process 0, the next m1 rows belong to 4326 process 1, the next m2 rows belong to process 2 etc.. where 4327 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4328 values corresponding to [m x N] submatrix. 4329 4330 The columns are logically partitioned with the n0 columns belonging 4331 to 0th partition, the next n1 columns belonging to the next 4332 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4333 4334 The DIAGONAL portion of the local submatrix on any given processor 4335 is the submatrix corresponding to the rows and columns m,n 4336 corresponding to the given processor. i.e diagonal matrix on 4337 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4338 etc. The remaining portion of the local submatrix [m x (N-n)] 4339 constitute the OFF-DIAGONAL portion. The example below better 4340 illustrates this concept. 4341 4342 For a square global matrix we define each processor's diagonal portion 4343 to be its local rows and the corresponding columns (a square submatrix); 4344 each processor's off-diagonal portion encompasses the remainder of the 4345 local matrix (a rectangular submatrix). 4346 4347 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4348 4349 When calling this routine with a single process communicator, a matrix of 4350 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4351 type of communicator, use the construction mechanism 4352 .vb 4353 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4354 .ve 4355 4356 $ MatCreate(...,&A); 4357 $ MatSetType(A,MATMPIAIJ); 4358 $ MatSetSizes(A, m,n,M,N); 4359 $ MatMPIAIJSetPreallocation(A,...); 4360 4361 By default, this format uses inodes (identical nodes) when possible. 4362 We search for consecutive rows with the same nonzero structure, thereby 4363 reusing matrix information to achieve increased efficiency. 4364 4365 Options Database Keys: 4366 + -mat_no_inode - Do not use inodes 4367 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4368 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4369 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4370 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4371 4372 Example usage: 4373 4374 Consider the following 8x8 matrix with 34 non-zero values, that is 4375 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4376 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4377 as follows 4378 4379 .vb 4380 1 2 0 | 0 3 0 | 0 4 4381 Proc0 0 5 6 | 7 0 0 | 8 0 4382 9 0 10 | 11 0 0 | 12 0 4383 ------------------------------------- 4384 13 0 14 | 15 16 17 | 0 0 4385 Proc1 0 18 0 | 19 20 21 | 0 0 4386 0 0 0 | 22 23 0 | 24 0 4387 ------------------------------------- 4388 Proc2 25 26 27 | 0 0 28 | 29 0 4389 30 0 0 | 31 32 33 | 0 34 4390 .ve 4391 4392 This can be represented as a collection of submatrices as 4393 4394 .vb 4395 A B C 4396 D E F 4397 G H I 4398 .ve 4399 4400 Where the submatrices A,B,C are owned by proc0, D,E,F are 4401 owned by proc1, G,H,I are owned by proc2. 4402 4403 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4404 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4405 The 'M','N' parameters are 8,8, and have the same values on all procs. 4406 4407 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4408 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4409 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4410 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4411 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4412 matrix, ans [DF] as another SeqAIJ matrix. 4413 4414 When d_nz, o_nz parameters are specified, d_nz storage elements are 4415 allocated for every row of the local diagonal submatrix, and o_nz 4416 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4417 One way to choose d_nz and o_nz is to use the max nonzerors per local 4418 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4419 In this case, the values of d_nz,o_nz are 4420 .vb 4421 proc0 : dnz = 2, o_nz = 2 4422 proc1 : dnz = 3, o_nz = 2 4423 proc2 : dnz = 1, o_nz = 4 4424 .ve 4425 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4426 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4427 for proc3. i.e we are using 12+15+10=37 storage locations to store 4428 34 values. 4429 4430 When d_nnz, o_nnz parameters are specified, the storage is specified 4431 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4432 In the above case the values for d_nnz,o_nnz are 4433 .vb 4434 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4435 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4436 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4437 .ve 4438 Here the space allocated is sum of all the above values i.e 34, and 4439 hence pre-allocation is perfect. 4440 4441 Level: intermediate 4442 4443 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4444 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4445 @*/ 4446 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4447 { 4448 PetscErrorCode ierr; 4449 PetscMPIInt size; 4450 4451 PetscFunctionBegin; 4452 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4453 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4454 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4455 if (size > 1) { 4456 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4457 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4458 } else { 4459 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4460 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4461 } 4462 PetscFunctionReturn(0); 4463 } 4464 4465 /*@C 4466 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4467 4468 Not collective 4469 4470 Input Parameter: 4471 . A - The MPIAIJ matrix 4472 4473 Output Parameters: 4474 + Ad - The local diagonal block as a SeqAIJ matrix 4475 . Ao - The local off-diagonal block as a SeqAIJ matrix 4476 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4477 4478 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4479 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4480 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4481 local column numbers to global column numbers in the original matrix. 4482 4483 Level: intermediate 4484 4485 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4486 @*/ 4487 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4488 { 4489 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4490 PetscBool flg; 4491 PetscErrorCode ierr; 4492 4493 PetscFunctionBegin; 4494 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4495 PetscCheckFalse(!flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4496 if (Ad) *Ad = a->A; 4497 if (Ao) *Ao = a->B; 4498 if (colmap) *colmap = a->garray; 4499 PetscFunctionReturn(0); 4500 } 4501 4502 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4503 { 4504 PetscErrorCode ierr; 4505 PetscInt m,N,i,rstart,nnz,Ii; 4506 PetscInt *indx; 4507 PetscScalar *values; 4508 MatType rootType; 4509 4510 PetscFunctionBegin; 4511 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4512 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4513 PetscInt *dnz,*onz,sum,bs,cbs; 4514 4515 if (n == PETSC_DECIDE) { 4516 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4517 } 4518 /* Check sum(n) = N */ 4519 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4520 PetscCheckFalse(sum != N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 4521 4522 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4523 rstart -= m; 4524 4525 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4526 for (i=0; i<m; i++) { 4527 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4528 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4529 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4530 } 4531 4532 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4533 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4534 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4535 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4536 ierr = MatGetRootType_Private(inmat,&rootType);CHKERRQ(ierr); 4537 ierr = MatSetType(*outmat,rootType);CHKERRQ(ierr); 4538 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4539 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4540 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4541 ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4542 } 4543 4544 /* numeric phase */ 4545 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4546 for (i=0; i<m; i++) { 4547 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4548 Ii = i + rstart; 4549 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4550 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4551 } 4552 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4553 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4554 PetscFunctionReturn(0); 4555 } 4556 4557 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4558 { 4559 PetscErrorCode ierr; 4560 PetscMPIInt rank; 4561 PetscInt m,N,i,rstart,nnz; 4562 size_t len; 4563 const PetscInt *indx; 4564 PetscViewer out; 4565 char *name; 4566 Mat B; 4567 const PetscScalar *values; 4568 4569 PetscFunctionBegin; 4570 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4571 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4572 /* Should this be the type of the diagonal block of A? */ 4573 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4574 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4575 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4576 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4577 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4578 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4579 for (i=0; i<m; i++) { 4580 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4581 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4582 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4583 } 4584 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4585 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4586 4587 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 4588 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4589 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4590 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4591 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4592 ierr = PetscFree(name);CHKERRQ(ierr); 4593 ierr = MatView(B,out);CHKERRQ(ierr); 4594 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4595 ierr = MatDestroy(&B);CHKERRQ(ierr); 4596 PetscFunctionReturn(0); 4597 } 4598 4599 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4600 { 4601 PetscErrorCode ierr; 4602 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4603 4604 PetscFunctionBegin; 4605 if (!merge) PetscFunctionReturn(0); 4606 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4607 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4608 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4609 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4610 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4611 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4612 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4613 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4614 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4615 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4616 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4617 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4618 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4619 ierr = PetscFree(merge);CHKERRQ(ierr); 4620 PetscFunctionReturn(0); 4621 } 4622 4623 #include <../src/mat/utils/freespace.h> 4624 #include <petscbt.h> 4625 4626 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4627 { 4628 PetscErrorCode ierr; 4629 MPI_Comm comm; 4630 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4631 PetscMPIInt size,rank,taga,*len_s; 4632 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4633 PetscInt proc,m; 4634 PetscInt **buf_ri,**buf_rj; 4635 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4636 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4637 MPI_Request *s_waits,*r_waits; 4638 MPI_Status *status; 4639 const MatScalar *aa,*a_a; 4640 MatScalar **abuf_r,*ba_i; 4641 Mat_Merge_SeqsToMPI *merge; 4642 PetscContainer container; 4643 4644 PetscFunctionBegin; 4645 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4646 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4647 4648 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4649 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4650 4651 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4652 PetscCheckFalse(!container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4653 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4654 ierr = MatSeqAIJGetArrayRead(seqmat,&a_a);CHKERRQ(ierr); 4655 aa = a_a; 4656 4657 bi = merge->bi; 4658 bj = merge->bj; 4659 buf_ri = merge->buf_ri; 4660 buf_rj = merge->buf_rj; 4661 4662 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4663 owners = merge->rowmap->range; 4664 len_s = merge->len_s; 4665 4666 /* send and recv matrix values */ 4667 /*-----------------------------*/ 4668 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4669 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4670 4671 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4672 for (proc=0,k=0; proc<size; proc++) { 4673 if (!len_s[proc]) continue; 4674 i = owners[proc]; 4675 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr); 4676 k++; 4677 } 4678 4679 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);} 4680 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);} 4681 ierr = PetscFree(status);CHKERRQ(ierr); 4682 4683 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4684 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4685 4686 /* insert mat values of mpimat */ 4687 /*----------------------------*/ 4688 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4689 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4690 4691 for (k=0; k<merge->nrecv; k++) { 4692 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4693 nrows = *(buf_ri_k[k]); 4694 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4695 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4696 } 4697 4698 /* set values of ba */ 4699 m = merge->rowmap->n; 4700 for (i=0; i<m; i++) { 4701 arow = owners[rank] + i; 4702 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4703 bnzi = bi[i+1] - bi[i]; 4704 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4705 4706 /* add local non-zero vals of this proc's seqmat into ba */ 4707 anzi = ai[arow+1] - ai[arow]; 4708 aj = a->j + ai[arow]; 4709 aa = a_a + ai[arow]; 4710 nextaj = 0; 4711 for (j=0; nextaj<anzi; j++) { 4712 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4713 ba_i[j] += aa[nextaj++]; 4714 } 4715 } 4716 4717 /* add received vals into ba */ 4718 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4719 /* i-th row */ 4720 if (i == *nextrow[k]) { 4721 anzi = *(nextai[k]+1) - *nextai[k]; 4722 aj = buf_rj[k] + *(nextai[k]); 4723 aa = abuf_r[k] + *(nextai[k]); 4724 nextaj = 0; 4725 for (j=0; nextaj<anzi; j++) { 4726 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4727 ba_i[j] += aa[nextaj++]; 4728 } 4729 } 4730 nextrow[k]++; nextai[k]++; 4731 } 4732 } 4733 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4734 } 4735 ierr = MatSeqAIJRestoreArrayRead(seqmat,&a_a);CHKERRQ(ierr); 4736 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4737 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4738 4739 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4740 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4741 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4742 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4743 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4744 PetscFunctionReturn(0); 4745 } 4746 4747 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4748 { 4749 PetscErrorCode ierr; 4750 Mat B_mpi; 4751 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4752 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4753 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4754 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4755 PetscInt len,proc,*dnz,*onz,bs,cbs; 4756 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4757 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4758 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4759 MPI_Status *status; 4760 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4761 PetscBT lnkbt; 4762 Mat_Merge_SeqsToMPI *merge; 4763 PetscContainer container; 4764 4765 PetscFunctionBegin; 4766 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4767 4768 /* make sure it is a PETSc comm */ 4769 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4770 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4771 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4772 4773 ierr = PetscNew(&merge);CHKERRQ(ierr); 4774 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4775 4776 /* determine row ownership */ 4777 /*---------------------------------------------------------*/ 4778 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4779 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4780 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4781 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4782 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4783 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4784 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4785 4786 m = merge->rowmap->n; 4787 owners = merge->rowmap->range; 4788 4789 /* determine the number of messages to send, their lengths */ 4790 /*---------------------------------------------------------*/ 4791 len_s = merge->len_s; 4792 4793 len = 0; /* length of buf_si[] */ 4794 merge->nsend = 0; 4795 for (proc=0; proc<size; proc++) { 4796 len_si[proc] = 0; 4797 if (proc == rank) { 4798 len_s[proc] = 0; 4799 } else { 4800 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4801 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4802 } 4803 if (len_s[proc]) { 4804 merge->nsend++; 4805 nrows = 0; 4806 for (i=owners[proc]; i<owners[proc+1]; i++) { 4807 if (ai[i+1] > ai[i]) nrows++; 4808 } 4809 len_si[proc] = 2*(nrows+1); 4810 len += len_si[proc]; 4811 } 4812 } 4813 4814 /* determine the number and length of messages to receive for ij-structure */ 4815 /*-------------------------------------------------------------------------*/ 4816 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4817 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4818 4819 /* post the Irecv of j-structure */ 4820 /*-------------------------------*/ 4821 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4822 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4823 4824 /* post the Isend of j-structure */ 4825 /*--------------------------------*/ 4826 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4827 4828 for (proc=0, k=0; proc<size; proc++) { 4829 if (!len_s[proc]) continue; 4830 i = owners[proc]; 4831 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr); 4832 k++; 4833 } 4834 4835 /* receives and sends of j-structure are complete */ 4836 /*------------------------------------------------*/ 4837 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);} 4838 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);} 4839 4840 /* send and recv i-structure */ 4841 /*---------------------------*/ 4842 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4843 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4844 4845 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4846 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4847 for (proc=0,k=0; proc<size; proc++) { 4848 if (!len_s[proc]) continue; 4849 /* form outgoing message for i-structure: 4850 buf_si[0]: nrows to be sent 4851 [1:nrows]: row index (global) 4852 [nrows+1:2*nrows+1]: i-structure index 4853 */ 4854 /*-------------------------------------------*/ 4855 nrows = len_si[proc]/2 - 1; 4856 buf_si_i = buf_si + nrows+1; 4857 buf_si[0] = nrows; 4858 buf_si_i[0] = 0; 4859 nrows = 0; 4860 for (i=owners[proc]; i<owners[proc+1]; i++) { 4861 anzi = ai[i+1] - ai[i]; 4862 if (anzi) { 4863 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4864 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4865 nrows++; 4866 } 4867 } 4868 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr); 4869 k++; 4870 buf_si += len_si[proc]; 4871 } 4872 4873 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);} 4874 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);} 4875 4876 ierr = PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4877 for (i=0; i<merge->nrecv; i++) { 4878 ierr = PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4879 } 4880 4881 ierr = PetscFree(len_si);CHKERRQ(ierr); 4882 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4883 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4884 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4885 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4886 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4887 ierr = PetscFree(status);CHKERRQ(ierr); 4888 4889 /* compute a local seq matrix in each processor */ 4890 /*----------------------------------------------*/ 4891 /* allocate bi array and free space for accumulating nonzero column info */ 4892 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4893 bi[0] = 0; 4894 4895 /* create and initialize a linked list */ 4896 nlnk = N+1; 4897 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4898 4899 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4900 len = ai[owners[rank+1]] - ai[owners[rank]]; 4901 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4902 4903 current_space = free_space; 4904 4905 /* determine symbolic info for each local row */ 4906 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4907 4908 for (k=0; k<merge->nrecv; k++) { 4909 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4910 nrows = *buf_ri_k[k]; 4911 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4912 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4913 } 4914 4915 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4916 len = 0; 4917 for (i=0; i<m; i++) { 4918 bnzi = 0; 4919 /* add local non-zero cols of this proc's seqmat into lnk */ 4920 arow = owners[rank] + i; 4921 anzi = ai[arow+1] - ai[arow]; 4922 aj = a->j + ai[arow]; 4923 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4924 bnzi += nlnk; 4925 /* add received col data into lnk */ 4926 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4927 if (i == *nextrow[k]) { /* i-th row */ 4928 anzi = *(nextai[k]+1) - *nextai[k]; 4929 aj = buf_rj[k] + *nextai[k]; 4930 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4931 bnzi += nlnk; 4932 nextrow[k]++; nextai[k]++; 4933 } 4934 } 4935 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4936 4937 /* if free space is not available, make more free space */ 4938 if (current_space->local_remaining<bnzi) { 4939 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4940 nspacedouble++; 4941 } 4942 /* copy data into free space, then initialize lnk */ 4943 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4944 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4945 4946 current_space->array += bnzi; 4947 current_space->local_used += bnzi; 4948 current_space->local_remaining -= bnzi; 4949 4950 bi[i+1] = bi[i] + bnzi; 4951 } 4952 4953 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4954 4955 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4956 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4957 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4958 4959 /* create symbolic parallel matrix B_mpi */ 4960 /*---------------------------------------*/ 4961 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4962 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4963 if (n==PETSC_DECIDE) { 4964 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4965 } else { 4966 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4967 } 4968 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4969 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4970 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4971 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4972 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4973 4974 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4975 B_mpi->assembled = PETSC_FALSE; 4976 merge->bi = bi; 4977 merge->bj = bj; 4978 merge->buf_ri = buf_ri; 4979 merge->buf_rj = buf_rj; 4980 merge->coi = NULL; 4981 merge->coj = NULL; 4982 merge->owners_co = NULL; 4983 4984 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4985 4986 /* attach the supporting struct to B_mpi for reuse */ 4987 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4988 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4989 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 4990 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4991 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4992 *mpimat = B_mpi; 4993 4994 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4995 PetscFunctionReturn(0); 4996 } 4997 4998 /*@C 4999 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5000 matrices from each processor 5001 5002 Collective 5003 5004 Input Parameters: 5005 + comm - the communicators the parallel matrix will live on 5006 . seqmat - the input sequential matrices 5007 . m - number of local rows (or PETSC_DECIDE) 5008 . n - number of local columns (or PETSC_DECIDE) 5009 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5010 5011 Output Parameter: 5012 . mpimat - the parallel matrix generated 5013 5014 Level: advanced 5015 5016 Notes: 5017 The dimensions of the sequential matrix in each processor MUST be the same. 5018 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5019 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5020 @*/ 5021 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5022 { 5023 PetscErrorCode ierr; 5024 PetscMPIInt size; 5025 5026 PetscFunctionBegin; 5027 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5028 if (size == 1) { 5029 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5030 if (scall == MAT_INITIAL_MATRIX) { 5031 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5032 } else { 5033 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5034 } 5035 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5036 PetscFunctionReturn(0); 5037 } 5038 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5039 if (scall == MAT_INITIAL_MATRIX) { 5040 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5041 } 5042 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5043 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5044 PetscFunctionReturn(0); 5045 } 5046 5047 /*@ 5048 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5049 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5050 with MatGetSize() 5051 5052 Not Collective 5053 5054 Input Parameters: 5055 + A - the matrix 5056 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5057 5058 Output Parameter: 5059 . A_loc - the local sequential matrix generated 5060 5061 Level: developer 5062 5063 Notes: 5064 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5065 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5066 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5067 modify the values of the returned A_loc. 5068 5069 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 5070 @*/ 5071 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5072 { 5073 PetscErrorCode ierr; 5074 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5075 Mat_SeqAIJ *mat,*a,*b; 5076 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5077 const PetscScalar *aa,*ba,*aav,*bav; 5078 PetscScalar *ca,*cam; 5079 PetscMPIInt size; 5080 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5081 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5082 PetscBool match; 5083 5084 PetscFunctionBegin; 5085 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5086 PetscCheckFalse(!match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5087 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5088 if (size == 1) { 5089 if (scall == MAT_INITIAL_MATRIX) { 5090 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5091 *A_loc = mpimat->A; 5092 } else if (scall == MAT_REUSE_MATRIX) { 5093 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5094 } 5095 PetscFunctionReturn(0); 5096 } 5097 5098 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5099 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5100 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5101 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5102 ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5103 ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5104 aa = aav; 5105 ba = bav; 5106 if (scall == MAT_INITIAL_MATRIX) { 5107 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5108 ci[0] = 0; 5109 for (i=0; i<am; i++) { 5110 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5111 } 5112 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5113 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5114 k = 0; 5115 for (i=0; i<am; i++) { 5116 ncols_o = bi[i+1] - bi[i]; 5117 ncols_d = ai[i+1] - ai[i]; 5118 /* off-diagonal portion of A */ 5119 for (jo=0; jo<ncols_o; jo++) { 5120 col = cmap[*bj]; 5121 if (col >= cstart) break; 5122 cj[k] = col; bj++; 5123 ca[k++] = *ba++; 5124 } 5125 /* diagonal portion of A */ 5126 for (j=0; j<ncols_d; j++) { 5127 cj[k] = cstart + *aj++; 5128 ca[k++] = *aa++; 5129 } 5130 /* off-diagonal portion of A */ 5131 for (j=jo; j<ncols_o; j++) { 5132 cj[k] = cmap[*bj++]; 5133 ca[k++] = *ba++; 5134 } 5135 } 5136 /* put together the new matrix */ 5137 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5138 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5139 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5140 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5141 mat->free_a = PETSC_TRUE; 5142 mat->free_ij = PETSC_TRUE; 5143 mat->nonew = 0; 5144 } else if (scall == MAT_REUSE_MATRIX) { 5145 mat =(Mat_SeqAIJ*)(*A_loc)->data; 5146 ci = mat->i; 5147 cj = mat->j; 5148 ierr = MatSeqAIJGetArrayWrite(*A_loc,&cam);CHKERRQ(ierr); 5149 for (i=0; i<am; i++) { 5150 /* off-diagonal portion of A */ 5151 ncols_o = bi[i+1] - bi[i]; 5152 for (jo=0; jo<ncols_o; jo++) { 5153 col = cmap[*bj]; 5154 if (col >= cstart) break; 5155 *cam++ = *ba++; bj++; 5156 } 5157 /* diagonal portion of A */ 5158 ncols_d = ai[i+1] - ai[i]; 5159 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5160 /* off-diagonal portion of A */ 5161 for (j=jo; j<ncols_o; j++) { 5162 *cam++ = *ba++; bj++; 5163 } 5164 } 5165 ierr = MatSeqAIJRestoreArrayWrite(*A_loc,&cam);CHKERRQ(ierr); 5166 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5167 ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5168 ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5169 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5170 PetscFunctionReturn(0); 5171 } 5172 5173 /*@ 5174 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5175 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5176 5177 Not Collective 5178 5179 Input Parameters: 5180 + A - the matrix 5181 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5182 5183 Output Parameters: 5184 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5185 - A_loc - the local sequential matrix generated 5186 5187 Level: developer 5188 5189 Notes: 5190 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5191 5192 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5193 5194 @*/ 5195 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5196 { 5197 PetscErrorCode ierr; 5198 Mat Ao,Ad; 5199 const PetscInt *cmap; 5200 PetscMPIInt size; 5201 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5202 5203 PetscFunctionBegin; 5204 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr); 5205 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5206 if (size == 1) { 5207 if (scall == MAT_INITIAL_MATRIX) { 5208 ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr); 5209 *A_loc = Ad; 5210 } else if (scall == MAT_REUSE_MATRIX) { 5211 ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5212 } 5213 if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); } 5214 PetscFunctionReturn(0); 5215 } 5216 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr); 5217 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5218 if (f) { 5219 ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr); 5220 } else { 5221 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5222 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5223 Mat_SeqAIJ *c; 5224 PetscInt *ai = a->i, *aj = a->j; 5225 PetscInt *bi = b->i, *bj = b->j; 5226 PetscInt *ci,*cj; 5227 const PetscScalar *aa,*ba; 5228 PetscScalar *ca; 5229 PetscInt i,j,am,dn,on; 5230 5231 ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr); 5232 ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr); 5233 ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr); 5234 ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr); 5235 if (scall == MAT_INITIAL_MATRIX) { 5236 PetscInt k; 5237 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5238 ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr); 5239 ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr); 5240 ci[0] = 0; 5241 for (i=0,k=0; i<am; i++) { 5242 const PetscInt ncols_o = bi[i+1] - bi[i]; 5243 const PetscInt ncols_d = ai[i+1] - ai[i]; 5244 ci[i+1] = ci[i] + ncols_o + ncols_d; 5245 /* diagonal portion of A */ 5246 for (j=0; j<ncols_d; j++,k++) { 5247 cj[k] = *aj++; 5248 ca[k] = *aa++; 5249 } 5250 /* off-diagonal portion of A */ 5251 for (j=0; j<ncols_o; j++,k++) { 5252 cj[k] = dn + *bj++; 5253 ca[k] = *ba++; 5254 } 5255 } 5256 /* put together the new matrix */ 5257 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr); 5258 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5259 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5260 c = (Mat_SeqAIJ*)(*A_loc)->data; 5261 c->free_a = PETSC_TRUE; 5262 c->free_ij = PETSC_TRUE; 5263 c->nonew = 0; 5264 ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr); 5265 } else if (scall == MAT_REUSE_MATRIX) { 5266 ierr = MatSeqAIJGetArrayWrite(*A_loc,&ca);CHKERRQ(ierr); 5267 for (i=0; i<am; i++) { 5268 const PetscInt ncols_d = ai[i+1] - ai[i]; 5269 const PetscInt ncols_o = bi[i+1] - bi[i]; 5270 /* diagonal portion of A */ 5271 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5272 /* off-diagonal portion of A */ 5273 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5274 } 5275 ierr = MatSeqAIJRestoreArrayWrite(*A_loc,&ca);CHKERRQ(ierr); 5276 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5277 ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr); 5278 ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr); 5279 if (glob) { 5280 PetscInt cst, *gidx; 5281 5282 ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr); 5283 ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr); 5284 for (i=0; i<dn; i++) gidx[i] = cst + i; 5285 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5286 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr); 5287 } 5288 } 5289 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5290 PetscFunctionReturn(0); 5291 } 5292 5293 /*@C 5294 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5295 5296 Not Collective 5297 5298 Input Parameters: 5299 + A - the matrix 5300 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5301 - row, col - index sets of rows and columns to extract (or NULL) 5302 5303 Output Parameter: 5304 . A_loc - the local sequential matrix generated 5305 5306 Level: developer 5307 5308 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5309 5310 @*/ 5311 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5312 { 5313 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5314 PetscErrorCode ierr; 5315 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5316 IS isrowa,iscola; 5317 Mat *aloc; 5318 PetscBool match; 5319 5320 PetscFunctionBegin; 5321 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5322 PetscCheckFalse(!match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5323 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5324 if (!row) { 5325 start = A->rmap->rstart; end = A->rmap->rend; 5326 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5327 } else { 5328 isrowa = *row; 5329 } 5330 if (!col) { 5331 start = A->cmap->rstart; 5332 cmap = a->garray; 5333 nzA = a->A->cmap->n; 5334 nzB = a->B->cmap->n; 5335 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5336 ncols = 0; 5337 for (i=0; i<nzB; i++) { 5338 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5339 else break; 5340 } 5341 imark = i; 5342 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5343 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5344 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5345 } else { 5346 iscola = *col; 5347 } 5348 if (scall != MAT_INITIAL_MATRIX) { 5349 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5350 aloc[0] = *A_loc; 5351 } 5352 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5353 if (!col) { /* attach global id of condensed columns */ 5354 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5355 } 5356 *A_loc = aloc[0]; 5357 ierr = PetscFree(aloc);CHKERRQ(ierr); 5358 if (!row) { 5359 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5360 } 5361 if (!col) { 5362 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5363 } 5364 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5365 PetscFunctionReturn(0); 5366 } 5367 5368 /* 5369 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5370 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5371 * on a global size. 5372 * */ 5373 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5374 { 5375 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5376 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5377 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5378 PetscMPIInt owner; 5379 PetscSFNode *iremote,*oiremote; 5380 const PetscInt *lrowindices; 5381 PetscErrorCode ierr; 5382 PetscSF sf,osf; 5383 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5384 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5385 MPI_Comm comm; 5386 ISLocalToGlobalMapping mapping; 5387 const PetscScalar *pd_a,*po_a; 5388 5389 PetscFunctionBegin; 5390 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5391 /* plocalsize is the number of roots 5392 * nrows is the number of leaves 5393 * */ 5394 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5395 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5396 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5397 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5398 for (i=0;i<nrows;i++) { 5399 /* Find a remote index and an owner for a row 5400 * The row could be local or remote 5401 * */ 5402 owner = 0; 5403 lidx = 0; 5404 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5405 iremote[i].index = lidx; 5406 iremote[i].rank = owner; 5407 } 5408 /* Create SF to communicate how many nonzero columns for each row */ 5409 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5410 /* SF will figure out the number of nonzero colunms for each row, and their 5411 * offsets 5412 * */ 5413 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5414 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5415 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5416 5417 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5418 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5419 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5420 roffsets[0] = 0; 5421 roffsets[1] = 0; 5422 for (i=0;i<plocalsize;i++) { 5423 /* diag */ 5424 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5425 /* off diag */ 5426 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5427 /* compute offsets so that we relative location for each row */ 5428 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5429 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5430 } 5431 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5432 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5433 /* 'r' means root, and 'l' means leaf */ 5434 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5435 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5436 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5437 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5438 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5439 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5440 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5441 dntotalcols = 0; 5442 ontotalcols = 0; 5443 ncol = 0; 5444 for (i=0;i<nrows;i++) { 5445 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5446 ncol = PetscMax(pnnz[i],ncol); 5447 /* diag */ 5448 dntotalcols += nlcols[i*2+0]; 5449 /* off diag */ 5450 ontotalcols += nlcols[i*2+1]; 5451 } 5452 /* We do not need to figure the right number of columns 5453 * since all the calculations will be done by going through the raw data 5454 * */ 5455 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5456 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5457 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5458 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5459 /* diag */ 5460 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5461 /* off diag */ 5462 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5463 /* diag */ 5464 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5465 /* off diag */ 5466 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5467 dntotalcols = 0; 5468 ontotalcols = 0; 5469 ntotalcols = 0; 5470 for (i=0;i<nrows;i++) { 5471 owner = 0; 5472 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5473 /* Set iremote for diag matrix */ 5474 for (j=0;j<nlcols[i*2+0];j++) { 5475 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5476 iremote[dntotalcols].rank = owner; 5477 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5478 ilocal[dntotalcols++] = ntotalcols++; 5479 } 5480 /* off diag */ 5481 for (j=0;j<nlcols[i*2+1];j++) { 5482 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5483 oiremote[ontotalcols].rank = owner; 5484 oilocal[ontotalcols++] = ntotalcols++; 5485 } 5486 } 5487 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5488 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5489 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5490 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5491 /* P serves as roots and P_oth is leaves 5492 * Diag matrix 5493 * */ 5494 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5495 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5496 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5497 5498 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5499 /* Off diag */ 5500 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5501 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5502 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5503 ierr = MatSeqAIJGetArrayRead(p->A,&pd_a);CHKERRQ(ierr); 5504 ierr = MatSeqAIJGetArrayRead(p->B,&po_a);CHKERRQ(ierr); 5505 /* We operate on the matrix internal data for saving memory */ 5506 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5507 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5508 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5509 /* Convert to global indices for diag matrix */ 5510 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5511 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5512 /* We want P_oth store global indices */ 5513 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5514 /* Use memory scalable approach */ 5515 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5516 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5517 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5518 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5519 /* Convert back to local indices */ 5520 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5521 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5522 nout = 0; 5523 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5524 PetscCheckFalse(nout != po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout); 5525 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5526 /* Exchange values */ 5527 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5528 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5529 ierr = MatSeqAIJRestoreArrayRead(p->A,&pd_a);CHKERRQ(ierr); 5530 ierr = MatSeqAIJRestoreArrayRead(p->B,&po_a);CHKERRQ(ierr); 5531 /* Stop PETSc from shrinking memory */ 5532 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5533 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5534 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5535 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5536 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5537 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5538 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5539 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5540 PetscFunctionReturn(0); 5541 } 5542 5543 /* 5544 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5545 * This supports MPIAIJ and MAIJ 5546 * */ 5547 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5548 { 5549 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5550 Mat_SeqAIJ *p_oth; 5551 IS rows,map; 5552 PetscHMapI hamp; 5553 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5554 MPI_Comm comm; 5555 PetscSF sf,osf; 5556 PetscBool has; 5557 PetscErrorCode ierr; 5558 5559 PetscFunctionBegin; 5560 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5561 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5562 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5563 * and then create a submatrix (that often is an overlapping matrix) 5564 * */ 5565 if (reuse == MAT_INITIAL_MATRIX) { 5566 /* Use a hash table to figure out unique keys */ 5567 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5568 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5569 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5570 count = 0; 5571 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5572 for (i=0;i<a->B->cmap->n;i++) { 5573 key = a->garray[i]/dof; 5574 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5575 if (!has) { 5576 mapping[i] = count; 5577 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5578 } else { 5579 /* Current 'i' has the same value the previous step */ 5580 mapping[i] = count-1; 5581 } 5582 } 5583 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5584 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5585 PetscCheckFalse(htsize!=count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count); 5586 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5587 off = 0; 5588 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5589 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5590 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5591 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5592 /* In case, the matrix was already created but users want to recreate the matrix */ 5593 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5594 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5595 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5596 ierr = ISDestroy(&map);CHKERRQ(ierr); 5597 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5598 } else if (reuse == MAT_REUSE_MATRIX) { 5599 /* If matrix was already created, we simply update values using SF objects 5600 * that as attached to the matrix ealier. 5601 */ 5602 const PetscScalar *pd_a,*po_a; 5603 5604 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5605 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5606 PetscCheckFalse(!sf || !osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5607 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5608 /* Update values in place */ 5609 ierr = MatSeqAIJGetArrayRead(p->A,&pd_a);CHKERRQ(ierr); 5610 ierr = MatSeqAIJGetArrayRead(p->B,&po_a);CHKERRQ(ierr); 5611 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5612 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5613 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5614 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5615 ierr = MatSeqAIJRestoreArrayRead(p->A,&pd_a);CHKERRQ(ierr); 5616 ierr = MatSeqAIJRestoreArrayRead(p->B,&po_a);CHKERRQ(ierr); 5617 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5618 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5619 PetscFunctionReturn(0); 5620 } 5621 5622 /*@C 5623 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5624 5625 Collective on Mat 5626 5627 Input Parameters: 5628 + A - the first matrix in mpiaij format 5629 . B - the second matrix in mpiaij format 5630 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5631 5632 Input/Output Parameters: 5633 + rowb - index sets of rows of B to extract (or NULL), modified on output 5634 - colb - index sets of columns of B to extract (or NULL), modified on output 5635 5636 Output Parameter: 5637 . B_seq - the sequential matrix generated 5638 5639 Level: developer 5640 5641 @*/ 5642 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5643 { 5644 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5645 PetscErrorCode ierr; 5646 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5647 IS isrowb,iscolb; 5648 Mat *bseq=NULL; 5649 5650 PetscFunctionBegin; 5651 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5652 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5653 } 5654 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5655 5656 if (scall == MAT_INITIAL_MATRIX) { 5657 start = A->cmap->rstart; 5658 cmap = a->garray; 5659 nzA = a->A->cmap->n; 5660 nzB = a->B->cmap->n; 5661 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5662 ncols = 0; 5663 for (i=0; i<nzB; i++) { /* row < local row index */ 5664 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5665 else break; 5666 } 5667 imark = i; 5668 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5669 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5670 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5671 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5672 } else { 5673 PetscCheckFalse(!rowb || !colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5674 isrowb = *rowb; iscolb = *colb; 5675 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5676 bseq[0] = *B_seq; 5677 } 5678 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5679 *B_seq = bseq[0]; 5680 ierr = PetscFree(bseq);CHKERRQ(ierr); 5681 if (!rowb) { 5682 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5683 } else { 5684 *rowb = isrowb; 5685 } 5686 if (!colb) { 5687 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5688 } else { 5689 *colb = iscolb; 5690 } 5691 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5692 PetscFunctionReturn(0); 5693 } 5694 5695 /* 5696 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5697 of the OFF-DIAGONAL portion of local A 5698 5699 Collective on Mat 5700 5701 Input Parameters: 5702 + A,B - the matrices in mpiaij format 5703 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5704 5705 Output Parameter: 5706 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5707 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5708 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5709 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5710 5711 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5712 for this matrix. This is not desirable.. 5713 5714 Level: developer 5715 5716 */ 5717 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5718 { 5719 PetscErrorCode ierr; 5720 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5721 Mat_SeqAIJ *b_oth; 5722 VecScatter ctx; 5723 MPI_Comm comm; 5724 const PetscMPIInt *rprocs,*sprocs; 5725 const PetscInt *srow,*rstarts,*sstarts; 5726 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5727 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5728 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5729 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5730 PetscMPIInt size,tag,rank,nreqs; 5731 5732 PetscFunctionBegin; 5733 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5734 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5735 5736 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5737 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5738 } 5739 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5740 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 5741 5742 if (size == 1) { 5743 startsj_s = NULL; 5744 bufa_ptr = NULL; 5745 *B_oth = NULL; 5746 PetscFunctionReturn(0); 5747 } 5748 5749 ctx = a->Mvctx; 5750 tag = ((PetscObject)ctx)->tag; 5751 5752 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5753 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5754 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5755 ierr = PetscMPIIntCast(nsends+nrecvs,&nreqs);CHKERRQ(ierr); 5756 ierr = PetscMalloc1(nreqs,&reqs);CHKERRQ(ierr); 5757 rwaits = reqs; 5758 swaits = reqs + nrecvs; 5759 5760 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5761 if (scall == MAT_INITIAL_MATRIX) { 5762 /* i-array */ 5763 /*---------*/ 5764 /* post receives */ 5765 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5766 for (i=0; i<nrecvs; i++) { 5767 rowlen = rvalues + rstarts[i]*rbs; 5768 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5769 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5770 } 5771 5772 /* pack the outgoing message */ 5773 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5774 5775 sstartsj[0] = 0; 5776 rstartsj[0] = 0; 5777 len = 0; /* total length of j or a array to be sent */ 5778 if (nsends) { 5779 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5780 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5781 } 5782 for (i=0; i<nsends; i++) { 5783 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5784 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5785 for (j=0; j<nrows; j++) { 5786 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5787 for (l=0; l<sbs; l++) { 5788 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5789 5790 rowlen[j*sbs+l] = ncols; 5791 5792 len += ncols; 5793 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5794 } 5795 k++; 5796 } 5797 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5798 5799 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5800 } 5801 /* recvs and sends of i-array are completed */ 5802 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5803 ierr = PetscFree(svalues);CHKERRQ(ierr); 5804 5805 /* allocate buffers for sending j and a arrays */ 5806 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5807 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5808 5809 /* create i-array of B_oth */ 5810 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5811 5812 b_othi[0] = 0; 5813 len = 0; /* total length of j or a array to be received */ 5814 k = 0; 5815 for (i=0; i<nrecvs; i++) { 5816 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5817 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5818 for (j=0; j<nrows; j++) { 5819 b_othi[k+1] = b_othi[k] + rowlen[j]; 5820 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5821 k++; 5822 } 5823 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5824 } 5825 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5826 5827 /* allocate space for j and a arrrays of B_oth */ 5828 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5829 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5830 5831 /* j-array */ 5832 /*---------*/ 5833 /* post receives of j-array */ 5834 for (i=0; i<nrecvs; i++) { 5835 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5836 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5837 } 5838 5839 /* pack the outgoing message j-array */ 5840 if (nsends) k = sstarts[0]; 5841 for (i=0; i<nsends; i++) { 5842 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5843 bufJ = bufj+sstartsj[i]; 5844 for (j=0; j<nrows; j++) { 5845 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5846 for (ll=0; ll<sbs; ll++) { 5847 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5848 for (l=0; l<ncols; l++) { 5849 *bufJ++ = cols[l]; 5850 } 5851 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5852 } 5853 } 5854 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5855 } 5856 5857 /* recvs and sends of j-array are completed */ 5858 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5859 } else if (scall == MAT_REUSE_MATRIX) { 5860 sstartsj = *startsj_s; 5861 rstartsj = *startsj_r; 5862 bufa = *bufa_ptr; 5863 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5864 ierr = MatSeqAIJGetArrayWrite(*B_oth,&b_otha);CHKERRQ(ierr); 5865 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5866 5867 /* a-array */ 5868 /*---------*/ 5869 /* post receives of a-array */ 5870 for (i=0; i<nrecvs; i++) { 5871 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5872 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5873 } 5874 5875 /* pack the outgoing message a-array */ 5876 if (nsends) k = sstarts[0]; 5877 for (i=0; i<nsends; i++) { 5878 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5879 bufA = bufa+sstartsj[i]; 5880 for (j=0; j<nrows; j++) { 5881 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5882 for (ll=0; ll<sbs; ll++) { 5883 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5884 for (l=0; l<ncols; l++) { 5885 *bufA++ = vals[l]; 5886 } 5887 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5888 } 5889 } 5890 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5891 } 5892 /* recvs and sends of a-array are completed */ 5893 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5894 ierr = PetscFree(reqs);CHKERRQ(ierr); 5895 5896 if (scall == MAT_INITIAL_MATRIX) { 5897 /* put together the new matrix */ 5898 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5899 5900 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5901 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5902 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5903 b_oth->free_a = PETSC_TRUE; 5904 b_oth->free_ij = PETSC_TRUE; 5905 b_oth->nonew = 0; 5906 5907 ierr = PetscFree(bufj);CHKERRQ(ierr); 5908 if (!startsj_s || !bufa_ptr) { 5909 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5910 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5911 } else { 5912 *startsj_s = sstartsj; 5913 *startsj_r = rstartsj; 5914 *bufa_ptr = bufa; 5915 } 5916 } else if (scall == MAT_REUSE_MATRIX) { 5917 ierr = MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha);CHKERRQ(ierr); 5918 } 5919 5920 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5921 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5922 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5923 PetscFunctionReturn(0); 5924 } 5925 5926 /*@C 5927 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5928 5929 Not Collective 5930 5931 Input Parameter: 5932 . A - The matrix in mpiaij format 5933 5934 Output Parameters: 5935 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5936 . colmap - A map from global column index to local index into lvec 5937 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5938 5939 Level: developer 5940 5941 @*/ 5942 #if defined(PETSC_USE_CTABLE) 5943 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5944 #else 5945 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5946 #endif 5947 { 5948 Mat_MPIAIJ *a; 5949 5950 PetscFunctionBegin; 5951 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5952 PetscValidPointer(lvec, 2); 5953 PetscValidPointer(colmap, 3); 5954 PetscValidPointer(multScatter, 4); 5955 a = (Mat_MPIAIJ*) A->data; 5956 if (lvec) *lvec = a->lvec; 5957 if (colmap) *colmap = a->colmap; 5958 if (multScatter) *multScatter = a->Mvctx; 5959 PetscFunctionReturn(0); 5960 } 5961 5962 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5963 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5964 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5965 #if defined(PETSC_HAVE_MKL_SPARSE) 5966 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5967 #endif 5968 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5969 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5970 #if defined(PETSC_HAVE_ELEMENTAL) 5971 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5972 #endif 5973 #if defined(PETSC_HAVE_SCALAPACK) 5974 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5975 #endif 5976 #if defined(PETSC_HAVE_HYPRE) 5977 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5978 #endif 5979 #if defined(PETSC_HAVE_CUDA) 5980 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5981 #endif 5982 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5983 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5984 #endif 5985 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5986 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5987 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5988 5989 /* 5990 Computes (B'*A')' since computing B*A directly is untenable 5991 5992 n p p 5993 [ ] [ ] [ ] 5994 m [ A ] * n [ B ] = m [ C ] 5995 [ ] [ ] [ ] 5996 5997 */ 5998 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5999 { 6000 PetscErrorCode ierr; 6001 Mat At,Bt,Ct; 6002 6003 PetscFunctionBegin; 6004 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 6005 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 6006 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 6007 ierr = MatDestroy(&At);CHKERRQ(ierr); 6008 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 6009 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 6010 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 6011 PetscFunctionReturn(0); 6012 } 6013 6014 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 6015 { 6016 PetscErrorCode ierr; 6017 PetscBool cisdense; 6018 6019 PetscFunctionBegin; 6020 PetscCheckFalse(A->cmap->n != B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n); 6021 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 6022 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 6023 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 6024 if (!cisdense) { 6025 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6026 } 6027 ierr = MatSetUp(C);CHKERRQ(ierr); 6028 6029 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6030 PetscFunctionReturn(0); 6031 } 6032 6033 /* ----------------------------------------------------------------*/ 6034 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6035 { 6036 Mat_Product *product = C->product; 6037 Mat A = product->A,B=product->B; 6038 6039 PetscFunctionBegin; 6040 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6041 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6042 6043 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6044 C->ops->productsymbolic = MatProductSymbolic_AB; 6045 PetscFunctionReturn(0); 6046 } 6047 6048 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6049 { 6050 PetscErrorCode ierr; 6051 Mat_Product *product = C->product; 6052 6053 PetscFunctionBegin; 6054 if (product->type == MATPRODUCT_AB) { 6055 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 6056 } 6057 PetscFunctionReturn(0); 6058 } 6059 6060 /* std::upper_bound(): Given a sorted array, return index of the first element in range [first,last) whose value 6061 is greater than value, or last if there is no such element. 6062 */ 6063 static inline PetscErrorCode PetscSortedIntUpperBound(PetscInt *array,PetscCount first,PetscCount last,PetscInt value,PetscCount *upper) 6064 { 6065 PetscCount it,step,count = last - first; 6066 6067 PetscFunctionBegin; 6068 while (count > 0) { 6069 it = first; 6070 step = count / 2; 6071 it += step; 6072 if (!(value < array[it])) { 6073 first = ++it; 6074 count -= step + 1; 6075 } else count = step; 6076 } 6077 *upper = first; 6078 PetscFunctionReturn(0); 6079 } 6080 6081 /* Merge two sets of sorted nonzero entries and return a CSR for the merged (sequential) matrix 6082 6083 Input Parameters: 6084 6085 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6086 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6087 6088 mat: both sets' entries are on m rows, where m is the number of local rows of the matrix mat 6089 6090 For Set1, j1[] contains column indices of the nonzeros. 6091 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6092 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6093 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6094 6095 Similar for Set2. 6096 6097 This routine merges the two sets of nonzeros row by row and removes repeats. 6098 6099 Output Parameters: (memories are allocated by the caller) 6100 6101 i[],j[]: the CSR of the merged matrix, which has m rows. 6102 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6103 imap2[]: similar to imap1[], but for Set2. 6104 Note we order nonzeros row-by-row and from left to right. 6105 */ 6106 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[], 6107 const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[], 6108 PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[]) 6109 { 6110 PetscErrorCode ierr; 6111 PetscInt r,m; /* Row index of mat */ 6112 PetscCount t,t1,t2,b1,e1,b2,e2; 6113 6114 PetscFunctionBegin; 6115 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 6116 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6117 i[0] = 0; 6118 for (r=0; r<m; r++) { /* Do row by row merging */ 6119 b1 = rowBegin1[r]; 6120 e1 = rowEnd1[r]; 6121 b2 = rowBegin2[r]; 6122 e2 = rowEnd2[r]; 6123 while (b1 < e1 && b2 < e2) { 6124 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6125 j[t] = j1[b1]; 6126 imap1[t1] = t; 6127 imap2[t2] = t; 6128 b1 += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6129 b2 += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6130 t1++; t2++; t++; 6131 } else if (j1[b1] < j2[b2]) { 6132 j[t] = j1[b1]; 6133 imap1[t1] = t; 6134 b1 += jmap1[t1+1] - jmap1[t1]; 6135 t1++; t++; 6136 } else { 6137 j[t] = j2[b2]; 6138 imap2[t2] = t; 6139 b2 += jmap2[t2+1] - jmap2[t2]; 6140 t2++; t++; 6141 } 6142 } 6143 /* Merge the remaining in either j1[] or j2[] */ 6144 while (b1 < e1) { 6145 j[t] = j1[b1]; 6146 imap1[t1] = t; 6147 b1 += jmap1[t1+1] - jmap1[t1]; 6148 t1++; t++; 6149 } 6150 while (b2 < e2) { 6151 j[t] = j2[b2]; 6152 imap2[t2] = t; 6153 b2 += jmap2[t2+1] - jmap2[t2]; 6154 t2++; t++; 6155 } 6156 i[r+1] = t; 6157 } 6158 PetscFunctionReturn(0); 6159 } 6160 6161 /* Split a set/group of local entries into two subsets: those in the diagonal block and those in the off-diagonal block 6162 6163 Input Parameters: 6164 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6165 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6166 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6167 6168 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6169 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6170 6171 Output Parameters: 6172 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6173 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6174 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6175 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6176 6177 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6178 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6179 repeats (i.e., same 'i,j' pair). 6180 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6181 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6182 6183 Atot: number of entries belonging to the diagonal block 6184 Annz: number of unique nonzeros belonging to the diagonal block. 6185 6186 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6187 6188 Aperm[],Bperm[],Ajmap[],Bjmap[] are allocated by this routine with PetscMalloc4(). One has to free them with PetscFree4() in the exact order. 6189 */ 6190 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[], 6191 PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[], 6192 PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_, 6193 PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_) 6194 { 6195 PetscErrorCode ierr; 6196 PetscInt cstart,cend,rstart,rend,row,col; 6197 PetscCount Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6198 PetscCount Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6199 PetscCount k,m,p,q,r,s,mid; 6200 PetscCount *Aperm,*Bperm,*Ajmap,*Bjmap; 6201 6202 PetscFunctionBegin; 6203 ierr = PetscLayoutGetRange(mat->rmap,&rstart,&rend);CHKERRQ(ierr); 6204 ierr = PetscLayoutGetRange(mat->cmap,&cstart,&cend);CHKERRQ(ierr); 6205 m = rend - rstart; 6206 6207 for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */ 6208 6209 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6210 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6211 */ 6212 while (k<n) { 6213 row = i[k]; 6214 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6215 for (s=k; s<n; s++) if (i[s] != row) break; 6216 for (p=k; p<s; p++) { 6217 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6218 #if defined(PETSC_USE_DEBUG) 6219 else if (j[p] < 0 || j[p] > mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]); 6220 #endif 6221 } 6222 ierr = PetscSortIntWithCountArray(s-k,j+k,perm+k);CHKERRQ(ierr); 6223 ierr = PetscSortedIntUpperBound(j,k,s,-1,&mid);CHKERRQ(ierr); /* Seperate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6224 rowBegin[row-rstart] = k; 6225 rowMid[row-rstart] = mid; 6226 rowEnd[row-rstart] = s; 6227 6228 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6229 Atot += mid - k; 6230 Btot += s - mid; 6231 6232 /* Count unique nonzeros of this diag/offdiag row */ 6233 for (p=k; p<mid;) { 6234 col = j[p]; 6235 do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */ 6236 Annz++; 6237 } 6238 6239 for (p=mid; p<s;) { 6240 col = j[p]; 6241 do {p++;} while (p<s && j[p] == col); 6242 Bnnz++; 6243 } 6244 k = s; 6245 } 6246 6247 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6248 ierr = PetscMalloc4(Atot,&Aperm,Btot,&Bperm,Annz+1,&Ajmap,Bnnz+1,&Bjmap);CHKERRQ(ierr); 6249 6250 /* Re-scan indices and copy diag/offdiag permuation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6251 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6252 for (r=0; r<m; r++) { 6253 k = rowBegin[r]; 6254 mid = rowMid[r]; 6255 s = rowEnd[r]; 6256 ierr = PetscArraycpy(Aperm+Atot,perm+k, mid-k);CHKERRQ(ierr); 6257 ierr = PetscArraycpy(Bperm+Btot,perm+mid,s-mid);CHKERRQ(ierr); 6258 Atot += mid - k; 6259 Btot += s - mid; 6260 6261 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6262 for (p=k; p<mid;) { 6263 col = j[p]; 6264 q = p; 6265 do {p++;} while (p<mid && j[p] == col); 6266 Ajmap[Annz+1] = Ajmap[Annz] + (p - q); 6267 Annz++; 6268 } 6269 6270 for (p=mid; p<s;) { 6271 col = j[p]; 6272 q = p; 6273 do {p++;} while (p<s && j[p] == col); 6274 Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q); 6275 Bnnz++; 6276 } 6277 } 6278 /* Output */ 6279 *Aperm_ = Aperm; 6280 *Annz_ = Annz; 6281 *Atot_ = Atot; 6282 *Ajmap_ = Ajmap; 6283 *Bperm_ = Bperm; 6284 *Bnnz_ = Bnnz; 6285 *Btot_ = Btot; 6286 *Bjmap_ = Bjmap; 6287 PetscFunctionReturn(0); 6288 } 6289 6290 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[]) 6291 { 6292 PetscErrorCode ierr; 6293 MPI_Comm comm; 6294 PetscMPIInt rank,size; 6295 PetscInt m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6296 PetscCount k,p,q,rem; /* Loop variables over coo arrays */ 6297 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6298 6299 PetscFunctionBegin; 6300 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6301 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 6302 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 6303 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 6304 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 6305 ierr = PetscLayoutGetRange(mat->rmap,&rstart,&rend);CHKERRQ(ierr); 6306 ierr = PetscLayoutGetRange(mat->cmap,&cstart,&cend);CHKERRQ(ierr); 6307 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 6308 ierr = MatGetSize(mat,&M,&N);CHKERRQ(ierr); 6309 6310 /* ---------------------------------------------------------------------------*/ 6311 /* Sort (i,j) by row along with a permuation array, so that the to-be-ignored */ 6312 /* entries come first, then local rows, then remote rows. */ 6313 /* ---------------------------------------------------------------------------*/ 6314 PetscCount n1 = coo_n,*perm1; 6315 PetscInt *i1,*j1; /* Copies of input COOs along with a permutation array */ 6316 ierr = PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1);CHKERRQ(ierr); 6317 ierr = PetscArraycpy(i1,coo_i,n1);CHKERRQ(ierr); /* Make a copy since we'll modify it */ 6318 ierr = PetscArraycpy(j1,coo_j,n1);CHKERRQ(ierr); 6319 for (k=0; k<n1; k++) perm1[k] = k; 6320 6321 /* Manipulate indices so that entries with negative row or col indices will have smallest 6322 row indices, local entries will have greater but negative row indices, and remote entries 6323 will have positive row indices. 6324 */ 6325 for (k=0; k<n1; k++) { 6326 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6327 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6328 else if (mat->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6329 else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6330 } 6331 6332 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6333 ierr = PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1);CHKERRQ(ierr); 6334 for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */ 6335 ierr = PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem);CHKERRQ(ierr); /* rem is upper bound of the last local row */ 6336 for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6337 6338 /* ---------------------------------------------------------------------------*/ 6339 /* Split local rows into diag/offdiag portions */ 6340 /* ---------------------------------------------------------------------------*/ 6341 PetscCount *rowBegin1,*rowMid1,*rowEnd1; 6342 PetscCount *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1; 6343 PetscCount Annz1,Bnnz1,Atot1,Btot1; 6344 6345 ierr = PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1);CHKERRQ(ierr); 6346 ierr = PetscMalloc1(n1-rem,&Cperm1);CHKERRQ(ierr); 6347 ierr = MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1);CHKERRQ(ierr); 6348 6349 /* ---------------------------------------------------------------------------*/ 6350 /* Send remote rows to their owner */ 6351 /* ---------------------------------------------------------------------------*/ 6352 /* Find which rows should be sent to which remote ranks*/ 6353 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6354 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6355 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6356 const PetscInt *ranges; 6357 PetscInt maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6358 6359 ierr = PetscLayoutGetRanges(mat->rmap,&ranges);CHKERRQ(ierr); 6360 ierr = PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries);CHKERRQ(ierr); 6361 for (k=rem; k<n1;) { 6362 PetscMPIInt owner; 6363 PetscInt firstRow,lastRow; 6364 /* Locate a row range */ 6365 firstRow = i1[k]; /* first row of this owner */ 6366 ierr = PetscLayoutFindOwner(mat->rmap,firstRow,&owner);CHKERRQ(ierr); 6367 lastRow = ranges[owner+1]-1; /* last row of this owner */ 6368 6369 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6370 ierr = PetscSortedIntUpperBound(i1,k,n1,lastRow,&p);CHKERRQ(ierr); 6371 6372 /* All entries in [k,p) belong to this remote owner */ 6373 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6374 PetscMPIInt *sendto2; 6375 PetscInt *nentries2; 6376 PetscInt maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size; 6377 ierr = PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2);CHKERRQ(ierr); 6378 ierr = PetscArraycpy(sendto2,sendto,maxNsend);CHKERRQ(ierr); 6379 ierr = PetscArraycpy(nentries2,nentries2,maxNsend+1);CHKERRQ(ierr); 6380 ierr = PetscFree2(sendto,nentries2);CHKERRQ(ierr); 6381 sendto = sendto2; 6382 nentries = nentries2; 6383 maxNsend = maxNsend2; 6384 } 6385 sendto[nsend] = owner; 6386 nentries[nsend] = p - k; 6387 ierr = PetscCountCast(p-k,&nentries[nsend]);CHKERRQ(ierr); 6388 nsend++; 6389 k = p; 6390 } 6391 6392 /* Build 1st SF to know offsets on remote to send data */ 6393 PetscSF sf1; 6394 PetscInt nroots = 1,nroots2 = 0; 6395 PetscInt nleaves = nsend,nleaves2 = 0; 6396 PetscInt *offsets; 6397 PetscSFNode *iremote; 6398 6399 ierr = PetscSFCreate(comm,&sf1);CHKERRQ(ierr); 6400 ierr = PetscMalloc1(nsend,&iremote);CHKERRQ(ierr); 6401 ierr = PetscMalloc1(nsend,&offsets);CHKERRQ(ierr); 6402 for (k=0; k<nsend; k++) { 6403 iremote[k].rank = sendto[k]; 6404 iremote[k].index = 0; 6405 nleaves2 += nentries[k]; 6406 if (PetscUnlikely(nleaves2 < 0)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt"); 6407 } 6408 ierr = PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 6409 ierr = PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM);CHKERRQ(ierr); 6410 ierr = PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM);CHKERRQ(ierr); /* Would nroots2 overflow, we check offsets[] below */ 6411 ierr = PetscSFDestroy(&sf1);CHKERRQ(ierr); 6412 PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 " PetscInt_FMT " != number of remote entries " PetscCount_FMT "",nleaves2,n1-rem); 6413 6414 /* Build 2nd SF to send remote COOs to their owner */ 6415 PetscSF sf2; 6416 nroots = nroots2; 6417 nleaves = nleaves2; 6418 ierr = PetscSFCreate(comm,&sf2);CHKERRQ(ierr); 6419 ierr = PetscSFSetFromOptions(sf2);CHKERRQ(ierr); 6420 ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr); 6421 p = 0; 6422 for (k=0; k<nsend; k++) { 6423 if (PetscUnlikely(offsets[k] < 0)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt"); 6424 for (q=0; q<nentries[k]; q++,p++) { 6425 iremote[p].rank = sendto[k]; 6426 iremote[p].index = offsets[k] + q; 6427 } 6428 } 6429 ierr = PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 6430 6431 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permuation which will be used to fill leafdata */ 6432 ierr = PetscArraycpy(Cperm1,perm1+rem,n1-rem);CHKERRQ(ierr); 6433 6434 /* Send the remote COOs to their owner */ 6435 PetscInt n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6436 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6437 ierr = PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2);CHKERRQ(ierr); 6438 ierr = PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE);CHKERRQ(ierr); 6439 ierr = PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE);CHKERRQ(ierr); 6440 ierr = PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE);CHKERRQ(ierr); 6441 ierr = PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE);CHKERRQ(ierr); 6442 6443 ierr = PetscFree(offsets);CHKERRQ(ierr); 6444 ierr = PetscFree2(sendto,nentries);CHKERRQ(ierr); 6445 6446 /* ---------------------------------------------------------------*/ 6447 /* Sort received COOs by row along with the permutation array */ 6448 /* ---------------------------------------------------------------*/ 6449 for (k=0; k<n2; k++) perm2[k] = k; 6450 ierr = PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2);CHKERRQ(ierr); 6451 6452 /* ---------------------------------------------------------------*/ 6453 /* Split received COOs into diag/offdiag portions */ 6454 /* ---------------------------------------------------------------*/ 6455 PetscCount *rowBegin2,*rowMid2,*rowEnd2; 6456 PetscCount *Ajmap2,*Aperm2,*Bjmap2,*Bperm2; 6457 PetscCount Annz2,Bnnz2,Atot2,Btot2; 6458 6459 ierr = PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2);CHKERRQ(ierr); 6460 ierr = MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2);CHKERRQ(ierr); 6461 6462 /* --------------------------------------------------------------------------*/ 6463 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6464 /* --------------------------------------------------------------------------*/ 6465 PetscInt *Ai,*Bi; 6466 PetscInt *Aj,*Bj; 6467 6468 ierr = PetscMalloc1(m+1,&Ai);CHKERRQ(ierr); 6469 ierr = PetscMalloc1(m+1,&Bi);CHKERRQ(ierr); 6470 ierr = PetscMalloc1(Annz1+Annz2,&Aj);CHKERRQ(ierr); /* Since local and remote entries might have dups, we might allocate excess memory */ 6471 ierr = PetscMalloc1(Bnnz1+Bnnz2,&Bj);CHKERRQ(ierr); 6472 6473 PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2; 6474 ierr = PetscMalloc4(Annz1,&Aimap1,Bnnz1,&Bimap1,Annz2,&Aimap2,Bnnz2,&Bimap2);CHKERRQ(ierr); 6475 6476 ierr = MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj);CHKERRQ(ierr); 6477 ierr = MatMergeEntries_Internal(mat,j1,j2,rowMid1, rowEnd1,rowMid2, rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj);CHKERRQ(ierr); 6478 ierr = PetscFree3(rowBegin1,rowMid1,rowEnd1);CHKERRQ(ierr); 6479 ierr = PetscFree3(rowBegin2,rowMid2,rowEnd2);CHKERRQ(ierr); 6480 ierr = PetscFree3(i1,j1,perm1);CHKERRQ(ierr); 6481 ierr = PetscFree3(i2,j2,perm2);CHKERRQ(ierr); 6482 6483 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6484 PetscInt Annz = Ai[m]; 6485 PetscInt Bnnz = Bi[m]; 6486 if (Annz < Annz1 + Annz2) { 6487 PetscInt *Aj_new; 6488 ierr = PetscMalloc1(Annz,&Aj_new);CHKERRQ(ierr); 6489 ierr = PetscArraycpy(Aj_new,Aj,Annz);CHKERRQ(ierr); 6490 ierr = PetscFree(Aj);CHKERRQ(ierr); 6491 Aj = Aj_new; 6492 } 6493 6494 if (Bnnz < Bnnz1 + Bnnz2) { 6495 PetscInt *Bj_new; 6496 ierr = PetscMalloc1(Bnnz,&Bj_new);CHKERRQ(ierr); 6497 ierr = PetscArraycpy(Bj_new,Bj,Bnnz);CHKERRQ(ierr); 6498 ierr = PetscFree(Bj);CHKERRQ(ierr); 6499 Bj = Bj_new; 6500 } 6501 6502 /* --------------------------------------------------------------------------------*/ 6503 /* Create a MPIAIJKOKKOS newmat with CSRs of A and B, then replace mat with newmat */ 6504 /* --------------------------------------------------------------------------------*/ 6505 Mat newmat; 6506 PetscScalar *Aa,*Ba; 6507 Mat_SeqAIJ *a,*b; 6508 6509 ierr = PetscCalloc1(Annz,&Aa);CHKERRQ(ierr); /* Zero matrix on device */ 6510 ierr = PetscCalloc1(Bnnz,&Ba);CHKERRQ(ierr); 6511 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6512 if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;} 6513 ierr = MatCreateMPIAIJWithSplitArrays(comm,m,n,M,N,Ai,Aj,Aa,Bi,Bj,Ba,&newmat);CHKERRQ(ierr); /* FIXME: Can we do it without creating a new mat? */ 6514 ierr = MatHeaderMerge(mat,&newmat);CHKERRQ(ierr); /* Unlike MatHeaderReplace(), some info, ex. mat->product is kept */ 6515 mpiaij = (Mat_MPIAIJ*)mat->data; 6516 a = (Mat_SeqAIJ*)mpiaij->A->data; 6517 b = (Mat_SeqAIJ*)mpiaij->B->data; 6518 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6519 a->free_a = b->free_a = PETSC_TRUE; 6520 a->free_ij = b->free_ij = PETSC_TRUE; 6521 6522 mpiaij->coo_n = coo_n; 6523 mpiaij->coo_sf = sf2; 6524 mpiaij->sendlen = nleaves; 6525 mpiaij->recvlen = nroots; 6526 6527 mpiaij->Annz1 = Annz1; 6528 mpiaij->Annz2 = Annz2; 6529 mpiaij->Bnnz1 = Bnnz1; 6530 mpiaij->Bnnz2 = Bnnz2; 6531 6532 mpiaij->Atot1 = Atot1; 6533 mpiaij->Atot2 = Atot2; 6534 mpiaij->Btot1 = Btot1; 6535 mpiaij->Btot2 = Btot2; 6536 6537 mpiaij->Aimap1 = Aimap1; 6538 mpiaij->Aimap2 = Aimap2; 6539 mpiaij->Bimap1 = Bimap1; 6540 mpiaij->Bimap2 = Bimap2; 6541 6542 mpiaij->Ajmap1 = Ajmap1; 6543 mpiaij->Ajmap2 = Ajmap2; 6544 mpiaij->Bjmap1 = Bjmap1; 6545 mpiaij->Bjmap2 = Bjmap2; 6546 6547 mpiaij->Aperm1 = Aperm1; 6548 mpiaij->Aperm2 = Aperm2; 6549 mpiaij->Bperm1 = Bperm1; 6550 mpiaij->Bperm2 = Bperm2; 6551 6552 mpiaij->Cperm1 = Cperm1; 6553 6554 /* Allocate in preallocation. If not used, it has zero cost on host */ 6555 ierr = PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf);CHKERRQ(ierr); 6556 PetscFunctionReturn(0); 6557 } 6558 6559 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode) 6560 { 6561 PetscErrorCode ierr; 6562 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6563 Mat A = mpiaij->A,B = mpiaij->B; 6564 PetscCount Annz1 = mpiaij->Annz1,Annz2 = mpiaij->Annz2,Bnnz1 = mpiaij->Bnnz1,Bnnz2 = mpiaij->Bnnz2; 6565 PetscScalar *Aa,*Ba; 6566 PetscScalar *sendbuf = mpiaij->sendbuf; 6567 PetscScalar *recvbuf = mpiaij->recvbuf; 6568 const PetscCount *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap1 = mpiaij->Aimap1,*Aimap2 = mpiaij->Aimap2; 6569 const PetscCount *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap1 = mpiaij->Bimap1,*Bimap2 = mpiaij->Bimap2; 6570 const PetscCount *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2; 6571 const PetscCount *Cperm1 = mpiaij->Cperm1; 6572 6573 PetscFunctionBegin; 6574 ierr = MatSeqAIJGetArray(A,&Aa);CHKERRQ(ierr); /* Might read and write matrix values */ 6575 ierr = MatSeqAIJGetArray(B,&Ba);CHKERRQ(ierr); 6576 if (imode == INSERT_VALUES) { 6577 ierr = PetscMemzero(Aa,((Mat_SeqAIJ*)A->data)->nz*sizeof(PetscScalar));CHKERRQ(ierr); 6578 ierr = PetscMemzero(Ba,((Mat_SeqAIJ*)B->data)->nz*sizeof(PetscScalar));CHKERRQ(ierr); 6579 } 6580 6581 /* Pack entries to be sent to remote */ 6582 for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6583 6584 /* Send remote entries to their owner and overlap the communication with local computation */ 6585 ierr = PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE);CHKERRQ(ierr); 6586 /* Add local entries to A and B */ 6587 for (PetscCount i=0; i<Annz1; i++) { 6588 for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) Aa[Aimap1[i]] += v[Aperm1[k]]; 6589 } 6590 for (PetscCount i=0; i<Bnnz1; i++) { 6591 for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) Ba[Bimap1[i]] += v[Bperm1[k]]; 6592 } 6593 ierr = PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE);CHKERRQ(ierr); 6594 6595 /* Add received remote entries to A and B */ 6596 for (PetscCount i=0; i<Annz2; i++) { 6597 for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6598 } 6599 for (PetscCount i=0; i<Bnnz2; i++) { 6600 for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6601 } 6602 ierr = MatSeqAIJRestoreArray(A,&Aa);CHKERRQ(ierr); 6603 ierr = MatSeqAIJRestoreArray(B,&Ba);CHKERRQ(ierr); 6604 PetscFunctionReturn(0); 6605 } 6606 6607 /* ----------------------------------------------------------------*/ 6608 6609 /*MC 6610 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6611 6612 Options Database Keys: 6613 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6614 6615 Level: beginner 6616 6617 Notes: 6618 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6619 in this case the values associated with the rows and columns one passes in are set to zero 6620 in the matrix 6621 6622 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6623 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6624 6625 .seealso: MatCreateAIJ() 6626 M*/ 6627 6628 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6629 { 6630 Mat_MPIAIJ *b; 6631 PetscErrorCode ierr; 6632 PetscMPIInt size; 6633 6634 PetscFunctionBegin; 6635 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 6636 6637 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6638 B->data = (void*)b; 6639 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6640 B->assembled = PETSC_FALSE; 6641 B->insertmode = NOT_SET_VALUES; 6642 b->size = size; 6643 6644 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr); 6645 6646 /* build cache for off array entries formed */ 6647 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6648 6649 b->donotstash = PETSC_FALSE; 6650 b->colmap = NULL; 6651 b->garray = NULL; 6652 b->roworiented = PETSC_TRUE; 6653 6654 /* stuff used for matrix vector multiply */ 6655 b->lvec = NULL; 6656 b->Mvctx = NULL; 6657 6658 /* stuff for MatGetRow() */ 6659 b->rowindices = NULL; 6660 b->rowvalues = NULL; 6661 b->getrowactive = PETSC_FALSE; 6662 6663 /* flexible pointer used in CUSPARSE classes */ 6664 b->spptr = NULL; 6665 6666 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6676 #if defined(PETSC_HAVE_CUDA) 6677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr); 6678 #endif 6679 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6680 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr); 6681 #endif 6682 #if defined(PETSC_HAVE_MKL_SPARSE) 6683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6684 #endif 6685 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6686 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6687 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6688 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr); 6689 #if defined(PETSC_HAVE_ELEMENTAL) 6690 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6691 #endif 6692 #if defined(PETSC_HAVE_SCALAPACK) 6693 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6694 #endif 6695 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6696 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6697 #if defined(PETSC_HAVE_HYPRE) 6698 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6699 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6700 #endif 6701 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6702 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6703 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ);CHKERRQ(ierr); 6704 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ);CHKERRQ(ierr); 6705 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6706 PetscFunctionReturn(0); 6707 } 6708 6709 /*@C 6710 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6711 and "off-diagonal" part of the matrix in CSR format. 6712 6713 Collective 6714 6715 Input Parameters: 6716 + comm - MPI communicator 6717 . m - number of local rows (Cannot be PETSC_DECIDE) 6718 . n - This value should be the same as the local size used in creating the 6719 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6720 calculated if N is given) For square matrices n is almost always m. 6721 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6722 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6723 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6724 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6725 . a - matrix values 6726 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6727 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6728 - oa - matrix values 6729 6730 Output Parameter: 6731 . mat - the matrix 6732 6733 Level: advanced 6734 6735 Notes: 6736 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6737 must free the arrays once the matrix has been destroyed and not before. 6738 6739 The i and j indices are 0 based 6740 6741 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6742 6743 This sets local rows and cannot be used to set off-processor values. 6744 6745 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6746 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6747 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6748 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6749 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6750 communication if it is known that only local entries will be set. 6751 6752 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6753 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6754 @*/ 6755 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6756 { 6757 PetscErrorCode ierr; 6758 Mat_MPIAIJ *maij; 6759 6760 PetscFunctionBegin; 6761 PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6762 PetscCheckFalse(i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6763 PetscCheckFalse(oi[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6764 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6765 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6766 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6767 maij = (Mat_MPIAIJ*) (*mat)->data; 6768 6769 (*mat)->preallocated = PETSC_TRUE; 6770 6771 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6772 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6773 6774 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6775 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6776 6777 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6778 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6779 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6780 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6781 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6782 PetscFunctionReturn(0); 6783 } 6784 6785 typedef struct { 6786 Mat *mp; /* intermediate products */ 6787 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6788 PetscInt cp; /* number of intermediate products */ 6789 6790 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6791 PetscInt *startsj_s,*startsj_r; 6792 PetscScalar *bufa; 6793 Mat P_oth; 6794 6795 /* may take advantage of merging product->B */ 6796 Mat Bloc; /* B-local by merging diag and off-diag */ 6797 6798 /* cusparse does not have support to split between symbolic and numeric phases. 6799 When api_user is true, we don't need to update the numerical values 6800 of the temporary storage */ 6801 PetscBool reusesym; 6802 6803 /* support for COO values insertion */ 6804 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6805 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6806 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6807 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6808 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6809 PetscMemType mtype; 6810 6811 /* customization */ 6812 PetscBool abmerge; 6813 PetscBool P_oth_bind; 6814 } MatMatMPIAIJBACKEND; 6815 6816 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6817 { 6818 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6819 PetscInt i; 6820 PetscErrorCode ierr; 6821 6822 PetscFunctionBegin; 6823 ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr); 6824 ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr); 6825 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr); 6826 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr); 6827 ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr); 6828 ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr); 6829 ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr); 6830 for (i = 0; i < mmdata->cp; i++) { 6831 ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr); 6832 } 6833 ierr = PetscFree2(mmdata->mp,mmdata->mptmp);CHKERRQ(ierr); 6834 ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr); 6835 ierr = PetscFree(mmdata->own);CHKERRQ(ierr); 6836 ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr); 6837 ierr = PetscFree(mmdata->off);CHKERRQ(ierr); 6838 ierr = PetscFree(mmdata);CHKERRQ(ierr); 6839 PetscFunctionReturn(0); 6840 } 6841 6842 /* Copy selected n entries with indices in idx[] of A to v[]. 6843 If idx is NULL, copy the whole data array of A to v[] 6844 */ 6845 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6846 { 6847 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6848 PetscErrorCode ierr; 6849 6850 PetscFunctionBegin; 6851 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr); 6852 if (f) { 6853 ierr = (*f)(A,n,idx,v);CHKERRQ(ierr); 6854 } else { 6855 const PetscScalar *vv; 6856 6857 ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr); 6858 if (n && idx) { 6859 PetscScalar *w = v; 6860 const PetscInt *oi = idx; 6861 PetscInt j; 6862 6863 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6864 } else { 6865 ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr); 6866 } 6867 ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr); 6868 } 6869 PetscFunctionReturn(0); 6870 } 6871 6872 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6873 { 6874 MatMatMPIAIJBACKEND *mmdata; 6875 PetscInt i,n_d,n_o; 6876 PetscErrorCode ierr; 6877 6878 PetscFunctionBegin; 6879 MatCheckProduct(C,1); 6880 PetscCheckFalse(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6881 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6882 if (!mmdata->reusesym) { /* update temporary matrices */ 6883 if (mmdata->P_oth) { 6884 ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6885 } 6886 if (mmdata->Bloc) { 6887 ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr); 6888 } 6889 } 6890 mmdata->reusesym = PETSC_FALSE; 6891 6892 for (i = 0; i < mmdata->cp; i++) { 6893 PetscCheckFalse(!mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6894 ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr); 6895 } 6896 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6897 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6898 6899 if (mmdata->mptmp[i]) continue; 6900 if (noff) { 6901 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6902 6903 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr); 6904 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr); 6905 n_o += noff; 6906 n_d += nown; 6907 } else { 6908 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6909 6910 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr); 6911 n_d += mm->nz; 6912 } 6913 } 6914 if (mmdata->hasoffproc) { /* offprocess insertion */ 6915 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6916 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6917 } 6918 ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr); 6919 PetscFunctionReturn(0); 6920 } 6921 6922 /* Support for Pt * A, A * P, or Pt * A * P */ 6923 #define MAX_NUMBER_INTERMEDIATE 4 6924 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6925 { 6926 Mat_Product *product = C->product; 6927 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6928 Mat_MPIAIJ *a,*p; 6929 MatMatMPIAIJBACKEND *mmdata; 6930 ISLocalToGlobalMapping P_oth_l2g = NULL; 6931 IS glob = NULL; 6932 const char *prefix; 6933 char pprefix[256]; 6934 const PetscInt *globidx,*P_oth_idx; 6935 PetscInt i,j,cp,m,n,M,N,*coo_i,*coo_j; 6936 PetscCount ncoo,ncoo_d,ncoo_o,ncoo_oown; 6937 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6938 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6939 /* a base offset; type-2: sparse with a local to global map table */ 6940 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6941 6942 MatProductType ptype; 6943 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6944 PetscMPIInt size; 6945 PetscErrorCode ierr; 6946 6947 PetscFunctionBegin; 6948 MatCheckProduct(C,1); 6949 PetscCheckFalse(product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6950 ptype = product->type; 6951 if (product->A->symmetric && ptype == MATPRODUCT_AtB) { 6952 ptype = MATPRODUCT_AB; 6953 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 6954 } 6955 switch (ptype) { 6956 case MATPRODUCT_AB: 6957 A = product->A; 6958 P = product->B; 6959 m = A->rmap->n; 6960 n = P->cmap->n; 6961 M = A->rmap->N; 6962 N = P->cmap->N; 6963 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6964 break; 6965 case MATPRODUCT_AtB: 6966 P = product->A; 6967 A = product->B; 6968 m = P->cmap->n; 6969 n = A->cmap->n; 6970 M = P->cmap->N; 6971 N = A->cmap->N; 6972 hasoffproc = PETSC_TRUE; 6973 break; 6974 case MATPRODUCT_PtAP: 6975 A = product->A; 6976 P = product->B; 6977 m = P->cmap->n; 6978 n = P->cmap->n; 6979 M = P->cmap->N; 6980 N = P->cmap->N; 6981 hasoffproc = PETSC_TRUE; 6982 break; 6983 default: 6984 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6985 } 6986 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr); 6987 if (size == 1) hasoffproc = PETSC_FALSE; 6988 6989 /* defaults */ 6990 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6991 mp[i] = NULL; 6992 mptmp[i] = PETSC_FALSE; 6993 rmapt[i] = -1; 6994 cmapt[i] = -1; 6995 rmapa[i] = NULL; 6996 cmapa[i] = NULL; 6997 } 6998 6999 /* customization */ 7000 ierr = PetscNew(&mmdata);CHKERRQ(ierr); 7001 mmdata->reusesym = product->api_user; 7002 if (ptype == MATPRODUCT_AB) { 7003 if (product->api_user) { 7004 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 7005 ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 7006 ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 7007 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7008 } else { 7009 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 7010 ierr = PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 7011 ierr = PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 7012 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7013 } 7014 } else if (ptype == MATPRODUCT_PtAP) { 7015 if (product->api_user) { 7016 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 7017 ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 7018 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7019 } else { 7020 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 7021 ierr = PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 7022 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7023 } 7024 } 7025 a = (Mat_MPIAIJ*)A->data; 7026 p = (Mat_MPIAIJ*)P->data; 7027 ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr); 7028 ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr); 7029 ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr); 7030 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 7031 ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr); 7032 7033 cp = 0; 7034 switch (ptype) { 7035 case MATPRODUCT_AB: /* A * P */ 7036 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 7037 7038 /* A_diag * P_local (merged or not) */ 7039 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7040 /* P is product->B */ 7041 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 7042 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 7043 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 7044 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7045 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7046 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7047 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7048 mp[cp]->product->api_user = product->api_user; 7049 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7050 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7051 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7052 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 7053 rmapt[cp] = 1; 7054 cmapt[cp] = 2; 7055 cmapa[cp] = globidx; 7056 mptmp[cp] = PETSC_FALSE; 7057 cp++; 7058 } else { /* A_diag * P_diag and A_diag * P_off */ 7059 ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr); 7060 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 7061 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7062 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7063 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7064 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7065 mp[cp]->product->api_user = product->api_user; 7066 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7067 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7068 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7069 rmapt[cp] = 1; 7070 cmapt[cp] = 1; 7071 mptmp[cp] = PETSC_FALSE; 7072 cp++; 7073 ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr); 7074 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 7075 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7076 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7077 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7078 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7079 mp[cp]->product->api_user = product->api_user; 7080 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7081 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7082 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7083 rmapt[cp] = 1; 7084 cmapt[cp] = 2; 7085 cmapa[cp] = p->garray; 7086 mptmp[cp] = PETSC_FALSE; 7087 cp++; 7088 } 7089 7090 /* A_off * P_other */ 7091 if (mmdata->P_oth) { 7092 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); /* make P_oth use local col ids */ 7093 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 7094 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 7095 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 7096 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 7097 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 7098 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7099 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7100 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7101 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7102 mp[cp]->product->api_user = product->api_user; 7103 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7104 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7105 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7106 rmapt[cp] = 1; 7107 cmapt[cp] = 2; 7108 cmapa[cp] = P_oth_idx; 7109 mptmp[cp] = PETSC_FALSE; 7110 cp++; 7111 } 7112 break; 7113 7114 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7115 /* A is product->B */ 7116 ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 7117 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7118 ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 7119 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 7120 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7121 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7122 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7123 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7124 mp[cp]->product->api_user = product->api_user; 7125 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7126 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7127 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7128 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 7129 rmapt[cp] = 2; 7130 rmapa[cp] = globidx; 7131 cmapt[cp] = 2; 7132 cmapa[cp] = globidx; 7133 mptmp[cp] = PETSC_FALSE; 7134 cp++; 7135 } else { 7136 ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 7137 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 7138 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7139 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7140 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7141 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7142 mp[cp]->product->api_user = product->api_user; 7143 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7144 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7145 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7146 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 7147 rmapt[cp] = 1; 7148 cmapt[cp] = 2; 7149 cmapa[cp] = globidx; 7150 mptmp[cp] = PETSC_FALSE; 7151 cp++; 7152 ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 7153 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 7154 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7155 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7156 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7157 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7158 mp[cp]->product->api_user = product->api_user; 7159 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7160 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7161 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7162 rmapt[cp] = 2; 7163 rmapa[cp] = p->garray; 7164 cmapt[cp] = 2; 7165 cmapa[cp] = globidx; 7166 mptmp[cp] = PETSC_FALSE; 7167 cp++; 7168 } 7169 break; 7170 case MATPRODUCT_PtAP: 7171 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 7172 /* P is product->B */ 7173 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 7174 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 7175 ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr); 7176 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7177 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7178 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7179 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7180 mp[cp]->product->api_user = product->api_user; 7181 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7182 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7183 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7184 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 7185 rmapt[cp] = 2; 7186 rmapa[cp] = globidx; 7187 cmapt[cp] = 2; 7188 cmapa[cp] = globidx; 7189 mptmp[cp] = PETSC_FALSE; 7190 cp++; 7191 if (mmdata->P_oth) { 7192 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); 7193 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 7194 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 7195 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 7196 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 7197 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 7198 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7199 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7200 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7201 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7202 mp[cp]->product->api_user = product->api_user; 7203 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7204 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7205 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7206 mptmp[cp] = PETSC_TRUE; 7207 cp++; 7208 ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr); 7209 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 7210 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7211 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7212 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7213 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7214 mp[cp]->product->api_user = product->api_user; 7215 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7216 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7217 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7218 rmapt[cp] = 2; 7219 rmapa[cp] = globidx; 7220 cmapt[cp] = 2; 7221 cmapa[cp] = P_oth_idx; 7222 mptmp[cp] = PETSC_FALSE; 7223 cp++; 7224 } 7225 break; 7226 default: 7227 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7228 } 7229 /* sanity check */ 7230 if (size > 1) for (i = 0; i < cp; i++) PetscCheckFalse(rmapt[i] == 2 && !hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i); 7231 7232 ierr = PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);CHKERRQ(ierr); 7233 for (i = 0; i < cp; i++) { 7234 mmdata->mp[i] = mp[i]; 7235 mmdata->mptmp[i] = mptmp[i]; 7236 } 7237 mmdata->cp = cp; 7238 C->product->data = mmdata; 7239 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7240 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7241 7242 /* memory type */ 7243 mmdata->mtype = PETSC_MEMTYPE_HOST; 7244 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr); 7245 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr); 7246 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7247 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 7248 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE; 7249 #endif 7250 7251 /* prepare coo coordinates for values insertion */ 7252 7253 /* count total nonzeros of those intermediate seqaij Mats 7254 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7255 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7256 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7257 */ 7258 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7259 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7260 if (mptmp[cp]) continue; 7261 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7262 const PetscInt *rmap = rmapa[cp]; 7263 const PetscInt mr = mp[cp]->rmap->n; 7264 const PetscInt rs = C->rmap->rstart; 7265 const PetscInt re = C->rmap->rend; 7266 const PetscInt *ii = mm->i; 7267 for (i = 0; i < mr; i++) { 7268 const PetscInt gr = rmap[i]; 7269 const PetscInt nz = ii[i+1] - ii[i]; 7270 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7271 else ncoo_oown += nz; /* this row is local */ 7272 } 7273 } else ncoo_d += mm->nz; 7274 } 7275 7276 /* 7277 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7278 7279 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7280 7281 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7282 7283 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7284 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7285 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7286 7287 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7288 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7289 */ 7290 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); /* +1 to make a csr-like data structure */ 7291 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr); 7292 7293 /* gather (i,j) of nonzeros inserted by remote procs */ 7294 if (hasoffproc) { 7295 PetscSF msf; 7296 PetscInt ncoo2,*coo_i2,*coo_j2; 7297 7298 ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr); 7299 ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr); 7300 ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); /* to collect (i,j) of entries to be sent to others */ 7301 7302 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7303 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7304 PetscInt *idxoff = mmdata->off[cp]; 7305 PetscInt *idxown = mmdata->own[cp]; 7306 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7307 const PetscInt *rmap = rmapa[cp]; 7308 const PetscInt *cmap = cmapa[cp]; 7309 const PetscInt *ii = mm->i; 7310 PetscInt *coi = coo_i + ncoo_o; 7311 PetscInt *coj = coo_j + ncoo_o; 7312 const PetscInt mr = mp[cp]->rmap->n; 7313 const PetscInt rs = C->rmap->rstart; 7314 const PetscInt re = C->rmap->rend; 7315 const PetscInt cs = C->cmap->rstart; 7316 for (i = 0; i < mr; i++) { 7317 const PetscInt *jj = mm->j + ii[i]; 7318 const PetscInt gr = rmap[i]; 7319 const PetscInt nz = ii[i+1] - ii[i]; 7320 if (gr < rs || gr >= re) { /* this is an offproc row */ 7321 for (j = ii[i]; j < ii[i+1]; j++) { 7322 *coi++ = gr; 7323 *idxoff++ = j; 7324 } 7325 if (!cmapt[cp]) { /* already global */ 7326 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7327 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7328 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7329 } else { /* offdiag */ 7330 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7331 } 7332 ncoo_o += nz; 7333 } else { /* this is a local row */ 7334 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 7335 } 7336 } 7337 } 7338 mmdata->off[cp + 1] = idxoff; 7339 mmdata->own[cp + 1] = idxown; 7340 } 7341 7342 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 7343 ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr); 7344 ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr); 7345 ierr = PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);CHKERRQ(ierr); 7346 ncoo = ncoo_d + ncoo_oown + ncoo2; 7347 ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr); 7348 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); /* put (i,j) of remote nonzeros at back */ 7349 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 7350 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 7351 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 7352 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 7353 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7354 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr); 7355 coo_i = coo_i2; 7356 coo_j = coo_j2; 7357 } else { /* no offproc values insertion */ 7358 ncoo = ncoo_d; 7359 ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr); 7360 7361 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 7362 ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr); 7363 ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr); 7364 } 7365 mmdata->hasoffproc = hasoffproc; 7366 7367 /* gather (i,j) of nonzeros inserted locally */ 7368 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7369 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7370 PetscInt *coi = coo_i + ncoo_d; 7371 PetscInt *coj = coo_j + ncoo_d; 7372 const PetscInt *jj = mm->j; 7373 const PetscInt *ii = mm->i; 7374 const PetscInt *cmap = cmapa[cp]; 7375 const PetscInt *rmap = rmapa[cp]; 7376 const PetscInt mr = mp[cp]->rmap->n; 7377 const PetscInt rs = C->rmap->rstart; 7378 const PetscInt re = C->rmap->rend; 7379 const PetscInt cs = C->cmap->rstart; 7380 7381 if (mptmp[cp]) continue; 7382 if (rmapt[cp] == 1) { /* consecutive rows */ 7383 /* fill coo_i */ 7384 for (i = 0; i < mr; i++) { 7385 const PetscInt gr = i + rs; 7386 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 7387 } 7388 /* fill coo_j */ 7389 if (!cmapt[cp]) { /* type-0, already global */ 7390 ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr); 7391 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7392 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7393 } else { /* type-2, local to global for sparse columns */ 7394 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7395 } 7396 ncoo_d += mm->nz; 7397 } else if (rmapt[cp] == 2) { /* sparse rows */ 7398 for (i = 0; i < mr; i++) { 7399 const PetscInt *jj = mm->j + ii[i]; 7400 const PetscInt gr = rmap[i]; 7401 const PetscInt nz = ii[i+1] - ii[i]; 7402 if (gr >= rs && gr < re) { /* local rows */ 7403 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 7404 if (!cmapt[cp]) { /* type-0, already global */ 7405 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7406 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7407 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7408 } else { /* type-2, local to global for sparse columns */ 7409 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7410 } 7411 ncoo_d += nz; 7412 } 7413 } 7414 } 7415 } 7416 if (glob) { 7417 ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr); 7418 } 7419 ierr = ISDestroy(&glob);CHKERRQ(ierr); 7420 if (P_oth_l2g) { 7421 ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 7422 } 7423 ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr); 7424 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7425 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr); 7426 7427 /* preallocate with COO data */ 7428 ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr); 7429 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 7430 PetscFunctionReturn(0); 7431 } 7432 7433 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7434 { 7435 Mat_Product *product = mat->product; 7436 PetscErrorCode ierr; 7437 #if defined(PETSC_HAVE_DEVICE) 7438 PetscBool match = PETSC_FALSE; 7439 PetscBool usecpu = PETSC_FALSE; 7440 #else 7441 PetscBool match = PETSC_TRUE; 7442 #endif 7443 7444 PetscFunctionBegin; 7445 MatCheckProduct(mat,1); 7446 #if defined(PETSC_HAVE_DEVICE) 7447 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7448 ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr); 7449 } 7450 if (match) { /* we can always fallback to the CPU if requested */ 7451 switch (product->type) { 7452 case MATPRODUCT_AB: 7453 if (product->api_user) { 7454 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 7455 ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7456 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7457 } else { 7458 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 7459 ierr = PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7460 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7461 } 7462 break; 7463 case MATPRODUCT_AtB: 7464 if (product->api_user) { 7465 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr); 7466 ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7467 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7468 } else { 7469 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr); 7470 ierr = PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7471 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7472 } 7473 break; 7474 case MATPRODUCT_PtAP: 7475 if (product->api_user) { 7476 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 7477 ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7478 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7479 } else { 7480 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 7481 ierr = PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7482 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7483 } 7484 break; 7485 default: 7486 break; 7487 } 7488 match = (PetscBool)!usecpu; 7489 } 7490 #endif 7491 if (match) { 7492 switch (product->type) { 7493 case MATPRODUCT_AB: 7494 case MATPRODUCT_AtB: 7495 case MATPRODUCT_PtAP: 7496 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7497 break; 7498 default: 7499 break; 7500 } 7501 } 7502 /* fallback to MPIAIJ ops */ 7503 if (!mat->ops->productsymbolic) { 7504 ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr); 7505 } 7506 PetscFunctionReturn(0); 7507 } 7508 7509 /* 7510 Special version for direct calls from Fortran 7511 */ 7512 #include <petsc/private/fortranimpl.h> 7513 7514 /* Change these macros so can be used in void function */ 7515 /* Identical to CHKERRV, except it assigns to *_ierr */ 7516 #undef CHKERRQ 7517 #define CHKERRQ(ierr) do { \ 7518 PetscErrorCode ierr_msv_mpiaij = (ierr); \ 7519 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7520 *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \ 7521 return; \ 7522 } \ 7523 } while (0) 7524 7525 #undef SETERRQ 7526 #define SETERRQ(comm,ierr,...) do { \ 7527 *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \ 7528 return; \ 7529 } while (0) 7530 7531 #if defined(PETSC_HAVE_FORTRAN_CAPS) 7532 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 7533 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 7534 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 7535 #else 7536 #endif 7537 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 7538 { 7539 Mat mat = *mmat; 7540 PetscInt m = *mm, n = *mn; 7541 InsertMode addv = *maddv; 7542 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 7543 PetscScalar value; 7544 PetscErrorCode ierr; 7545 7546 MatCheckPreallocated(mat,1); 7547 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 7548 else PetscCheckFalse(mat->insertmode != addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 7549 { 7550 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 7551 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 7552 PetscBool roworiented = aij->roworiented; 7553 7554 /* Some Variables required in the macro */ 7555 Mat A = aij->A; 7556 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 7557 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 7558 MatScalar *aa; 7559 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 7560 Mat B = aij->B; 7561 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 7562 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 7563 MatScalar *ba; 7564 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 7565 * cannot use "#if defined" inside a macro. */ 7566 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 7567 7568 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 7569 PetscInt nonew = a->nonew; 7570 MatScalar *ap1,*ap2; 7571 7572 PetscFunctionBegin; 7573 ierr = MatSeqAIJGetArray(A,&aa);CHKERRQ(ierr); 7574 ierr = MatSeqAIJGetArray(B,&ba);CHKERRQ(ierr); 7575 for (i=0; i<m; i++) { 7576 if (im[i] < 0) continue; 7577 PetscAssertFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 7578 if (im[i] >= rstart && im[i] < rend) { 7579 row = im[i] - rstart; 7580 lastcol1 = -1; 7581 rp1 = aj + ai[row]; 7582 ap1 = aa + ai[row]; 7583 rmax1 = aimax[row]; 7584 nrow1 = ailen[row]; 7585 low1 = 0; 7586 high1 = nrow1; 7587 lastcol2 = -1; 7588 rp2 = bj + bi[row]; 7589 ap2 = ba + bi[row]; 7590 rmax2 = bimax[row]; 7591 nrow2 = bilen[row]; 7592 low2 = 0; 7593 high2 = nrow2; 7594 7595 for (j=0; j<n; j++) { 7596 if (roworiented) value = v[i*n+j]; 7597 else value = v[i+j*m]; 7598 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 7599 if (in[j] >= cstart && in[j] < cend) { 7600 col = in[j] - cstart; 7601 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 7602 } else if (in[j] < 0) continue; 7603 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 7604 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 7605 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 7606 } else { 7607 if (mat->was_assembled) { 7608 if (!aij->colmap) { 7609 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 7610 } 7611 #if defined(PETSC_USE_CTABLE) 7612 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 7613 col--; 7614 #else 7615 col = aij->colmap[in[j]] - 1; 7616 #endif 7617 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 7618 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 7619 col = in[j]; 7620 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 7621 B = aij->B; 7622 b = (Mat_SeqAIJ*)B->data; 7623 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 7624 rp2 = bj + bi[row]; 7625 ap2 = ba + bi[row]; 7626 rmax2 = bimax[row]; 7627 nrow2 = bilen[row]; 7628 low2 = 0; 7629 high2 = nrow2; 7630 bm = aij->B->rmap->n; 7631 ba = b->a; 7632 inserted = PETSC_FALSE; 7633 } 7634 } else col = in[j]; 7635 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 7636 } 7637 } 7638 } else if (!aij->donotstash) { 7639 if (roworiented) { 7640 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 7641 } else { 7642 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 7643 } 7644 } 7645 } 7646 ierr = MatSeqAIJRestoreArray(A,&aa);CHKERRQ(ierr); 7647 ierr = MatSeqAIJRestoreArray(B,&ba);CHKERRQ(ierr); 7648 } 7649 PetscFunctionReturnVoid(); 7650 } 7651 /* Undefining these here since they were redefined from their original definition above! No 7652 * other PETSc functions should be defined past this point, as it is impossible to recover the 7653 * original definitions */ 7654 #undef CHKERRQ 7655 #undef SETERRQ 7656