1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 63 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 64 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 65 * to differ from the parent matrix. */ 66 if (a->lvec) { 67 ierr = VecBindToCPU(a->lvec,flg);CHKERRQ(ierr); 68 } 69 if (a->diag) { 70 ierr = VecBindToCPU(a->diag,flg);CHKERRQ(ierr); 71 } 72 73 PetscFunctionReturn(0); 74 } 75 76 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 77 { 78 PetscErrorCode ierr; 79 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 80 81 PetscFunctionBegin; 82 if (mat->A) { 83 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 84 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 85 } 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 90 { 91 PetscErrorCode ierr; 92 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 93 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 94 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 95 const PetscInt *ia,*ib; 96 const MatScalar *aa,*bb,*aav,*bav; 97 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 98 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 99 100 PetscFunctionBegin; 101 *keptrows = NULL; 102 103 ia = a->i; 104 ib = b->i; 105 ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr); 106 ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr); 107 for (i=0; i<m; i++) { 108 na = ia[i+1] - ia[i]; 109 nb = ib[i+1] - ib[i]; 110 if (!na && !nb) { 111 cnt++; 112 goto ok1; 113 } 114 aa = aav + ia[i]; 115 for (j=0; j<na; j++) { 116 if (aa[j] != 0.0) goto ok1; 117 } 118 bb = bav + ib[i]; 119 for (j=0; j <nb; j++) { 120 if (bb[j] != 0.0) goto ok1; 121 } 122 cnt++; 123 ok1:; 124 } 125 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr); 126 if (!n0rows) { 127 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 128 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 132 cnt = 0; 133 for (i=0; i<m; i++) { 134 na = ia[i+1] - ia[i]; 135 nb = ib[i+1] - ib[i]; 136 if (!na && !nb) continue; 137 aa = aav + ia[i]; 138 for (j=0; j<na;j++) { 139 if (aa[j] != 0.0) { 140 rows[cnt++] = rstart + i; 141 goto ok2; 142 } 143 } 144 bb = bav + ib[i]; 145 for (j=0; j<nb; j++) { 146 if (bb[j] != 0.0) { 147 rows[cnt++] = rstart + i; 148 goto ok2; 149 } 150 } 151 ok2:; 152 } 153 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 154 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 155 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 156 PetscFunctionReturn(0); 157 } 158 159 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 160 { 161 PetscErrorCode ierr; 162 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 163 PetscBool cong; 164 165 PetscFunctionBegin; 166 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 167 if (Y->assembled && cong) { 168 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 169 } else { 170 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 171 } 172 PetscFunctionReturn(0); 173 } 174 175 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 176 { 177 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 178 PetscErrorCode ierr; 179 PetscInt i,rstart,nrows,*rows; 180 181 PetscFunctionBegin; 182 *zrows = NULL; 183 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 184 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 185 for (i=0; i<nrows; i++) rows[i] += rstart; 186 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 187 PetscFunctionReturn(0); 188 } 189 190 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 191 { 192 PetscErrorCode ierr; 193 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 194 PetscInt i,m,n,*garray = aij->garray; 195 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 196 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 197 PetscReal *work; 198 const PetscScalar *dummy; 199 200 PetscFunctionBegin; 201 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 202 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 203 ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr); 204 ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr); 205 ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr); 206 ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr); 207 if (type == NORM_2) { 208 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 209 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 210 } 211 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 212 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 213 } 214 } else if (type == NORM_1) { 215 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 216 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 217 } 218 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 219 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 220 } 221 } else if (type == NORM_INFINITY) { 222 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 223 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 224 } 225 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 226 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 227 } 228 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 229 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 230 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 231 } 232 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 233 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 234 } 235 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 236 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 237 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 238 } 239 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 240 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 241 } 242 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 243 if (type == NORM_INFINITY) { 244 ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 245 } else { 246 ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 247 } 248 ierr = PetscFree(work);CHKERRQ(ierr); 249 if (type == NORM_2) { 250 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 251 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 252 for (i=0; i<n; i++) reductions[i] /= m; 253 } 254 PetscFunctionReturn(0); 255 } 256 257 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 258 { 259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 260 IS sis,gis; 261 PetscErrorCode ierr; 262 const PetscInt *isis,*igis; 263 PetscInt n,*iis,nsis,ngis,rstart,i; 264 265 PetscFunctionBegin; 266 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 267 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 268 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 269 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 270 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 271 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 272 273 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 274 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 275 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 276 n = ngis + nsis; 277 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 278 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 279 for (i=0; i<n; i++) iis[i] += rstart; 280 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 281 282 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 283 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 284 ierr = ISDestroy(&sis);CHKERRQ(ierr); 285 ierr = ISDestroy(&gis);CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289 /* 290 Local utility routine that creates a mapping from the global column 291 number to the local number in the off-diagonal part of the local 292 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 293 a slightly higher hash table cost; without it it is not scalable (each processor 294 has an order N integer array but is fast to access. 295 */ 296 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 297 { 298 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 299 PetscErrorCode ierr; 300 PetscInt n = aij->B->cmap->n,i; 301 302 PetscFunctionBegin; 303 PetscCheckFalse(n && !aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 304 #if defined(PETSC_USE_CTABLE) 305 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 306 for (i=0; i<n; i++) { 307 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 308 } 309 #else 310 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 311 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 312 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 313 #endif 314 PetscFunctionReturn(0); 315 } 316 317 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 318 { \ 319 if (col <= lastcol1) low1 = 0; \ 320 else high1 = nrow1; \ 321 lastcol1 = col;\ 322 while (high1-low1 > 5) { \ 323 t = (low1+high1)/2; \ 324 if (rp1[t] > col) high1 = t; \ 325 else low1 = t; \ 326 } \ 327 for (_i=low1; _i<high1; _i++) { \ 328 if (rp1[_i] > col) break; \ 329 if (rp1[_i] == col) { \ 330 if (addv == ADD_VALUES) { \ 331 ap1[_i] += value; \ 332 /* Not sure LogFlops will slow dow the code or not */ \ 333 (void)PetscLogFlops(1.0); \ 334 } \ 335 else ap1[_i] = value; \ 336 goto a_noinsert; \ 337 } \ 338 } \ 339 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 340 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 341 PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 342 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 343 N = nrow1++ - 1; a->nz++; high1++; \ 344 /* shift up all the later entries in this row */ \ 345 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 346 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 347 rp1[_i] = col; \ 348 ap1[_i] = value; \ 349 A->nonzerostate++;\ 350 a_noinsert: ; \ 351 ailen[row] = nrow1; \ 352 } 353 354 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 355 { \ 356 if (col <= lastcol2) low2 = 0; \ 357 else high2 = nrow2; \ 358 lastcol2 = col; \ 359 while (high2-low2 > 5) { \ 360 t = (low2+high2)/2; \ 361 if (rp2[t] > col) high2 = t; \ 362 else low2 = t; \ 363 } \ 364 for (_i=low2; _i<high2; _i++) { \ 365 if (rp2[_i] > col) break; \ 366 if (rp2[_i] == col) { \ 367 if (addv == ADD_VALUES) { \ 368 ap2[_i] += value; \ 369 (void)PetscLogFlops(1.0); \ 370 } \ 371 else ap2[_i] = value; \ 372 goto b_noinsert; \ 373 } \ 374 } \ 375 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 376 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 377 PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 378 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 379 N = nrow2++ - 1; b->nz++; high2++; \ 380 /* shift up all the later entries in this row */ \ 381 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 382 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 383 rp2[_i] = col; \ 384 ap2[_i] = value; \ 385 B->nonzerostate++; \ 386 b_noinsert: ; \ 387 bilen[row] = nrow2; \ 388 } 389 390 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 391 { 392 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 393 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 394 PetscErrorCode ierr; 395 PetscInt l,*garray = mat->garray,diag; 396 PetscScalar *aa,*ba; 397 398 PetscFunctionBegin; 399 /* code only works for square matrices A */ 400 401 /* find size of row to the left of the diagonal part */ 402 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 403 row = row - diag; 404 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 405 if (garray[b->j[b->i[row]+l]] > diag) break; 406 } 407 if (l) { 408 ierr = MatSeqAIJGetArray(mat->B,&ba);CHKERRQ(ierr); 409 ierr = PetscArraycpy(ba+b->i[row],v,l);CHKERRQ(ierr); 410 ierr = MatSeqAIJRestoreArray(mat->B,&ba);CHKERRQ(ierr); 411 } 412 413 /* diagonal part */ 414 if (a->i[row+1]-a->i[row]) { 415 ierr = MatSeqAIJGetArray(mat->A,&aa);CHKERRQ(ierr); 416 ierr = PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 417 ierr = MatSeqAIJRestoreArray(mat->A,&aa);CHKERRQ(ierr); 418 } 419 420 /* right of diagonal part */ 421 if (b->i[row+1]-b->i[row]-l) { 422 ierr = MatSeqAIJGetArray(mat->B,&ba);CHKERRQ(ierr); 423 ierr = PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 424 ierr = MatSeqAIJRestoreArray(mat->B,&ba);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 430 { 431 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 432 PetscScalar value = 0.0; 433 PetscErrorCode ierr; 434 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 435 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 436 PetscBool roworiented = aij->roworiented; 437 438 /* Some Variables required in the macro */ 439 Mat A = aij->A; 440 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 441 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 442 PetscBool ignorezeroentries = a->ignorezeroentries; 443 Mat B = aij->B; 444 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 445 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 446 MatScalar *aa,*ba; 447 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 448 PetscInt nonew; 449 MatScalar *ap1,*ap2; 450 451 PetscFunctionBegin; 452 ierr = MatSeqAIJGetArray(A,&aa);CHKERRQ(ierr); 453 ierr = MatSeqAIJGetArray(B,&ba);CHKERRQ(ierr); 454 for (i=0; i<m; i++) { 455 if (im[i] < 0) continue; 456 PetscCheckFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 457 if (im[i] >= rstart && im[i] < rend) { 458 row = im[i] - rstart; 459 lastcol1 = -1; 460 rp1 = aj + ai[row]; 461 ap1 = aa + ai[row]; 462 rmax1 = aimax[row]; 463 nrow1 = ailen[row]; 464 low1 = 0; 465 high1 = nrow1; 466 lastcol2 = -1; 467 rp2 = bj + bi[row]; 468 ap2 = ba + bi[row]; 469 rmax2 = bimax[row]; 470 nrow2 = bilen[row]; 471 low2 = 0; 472 high2 = nrow2; 473 474 for (j=0; j<n; j++) { 475 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 476 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 477 if (in[j] >= cstart && in[j] < cend) { 478 col = in[j] - cstart; 479 nonew = a->nonew; 480 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 481 } else if (in[j] < 0) continue; 482 else PetscCheckFalse(in[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 483 else { 484 if (mat->was_assembled) { 485 if (!aij->colmap) { 486 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 487 } 488 #if defined(PETSC_USE_CTABLE) 489 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); /* map global col ids to local ones */ 490 col--; 491 #else 492 col = aij->colmap[in[j]] - 1; 493 #endif 494 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 495 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); /* Change aij->B from reduced/local format to expanded/global format */ 496 col = in[j]; 497 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 498 B = aij->B; 499 b = (Mat_SeqAIJ*)B->data; 500 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 501 rp2 = bj + bi[row]; 502 ap2 = ba + bi[row]; 503 rmax2 = bimax[row]; 504 nrow2 = bilen[row]; 505 low2 = 0; 506 high2 = nrow2; 507 bm = aij->B->rmap->n; 508 ba = b->a; 509 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 510 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 511 ierr = PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 512 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 513 } 514 } else col = in[j]; 515 nonew = b->nonew; 516 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 517 } 518 } 519 } else { 520 PetscCheckFalse(mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 521 if (!aij->donotstash) { 522 mat->assembled = PETSC_FALSE; 523 if (roworiented) { 524 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 525 } else { 526 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 527 } 528 } 529 } 530 } 531 ierr = MatSeqAIJRestoreArray(A,&aa);CHKERRQ(ierr); 532 ierr = MatSeqAIJRestoreArray(B,&ba);CHKERRQ(ierr); 533 PetscFunctionReturn(0); 534 } 535 536 /* 537 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 538 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 539 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 540 */ 541 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 542 { 543 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 544 Mat A = aij->A; /* diagonal part of the matrix */ 545 Mat B = aij->B; /* offdiagonal part of the matrix */ 546 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 547 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 548 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 549 PetscInt *ailen = a->ilen,*aj = a->j; 550 PetscInt *bilen = b->ilen,*bj = b->j; 551 PetscInt am = aij->A->rmap->n,j; 552 PetscInt diag_so_far = 0,dnz; 553 PetscInt offd_so_far = 0,onz; 554 555 PetscFunctionBegin; 556 /* Iterate over all rows of the matrix */ 557 for (j=0; j<am; j++) { 558 dnz = onz = 0; 559 /* Iterate over all non-zero columns of the current row */ 560 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 561 /* If column is in the diagonal */ 562 if (mat_j[col] >= cstart && mat_j[col] < cend) { 563 aj[diag_so_far++] = mat_j[col] - cstart; 564 dnz++; 565 } else { /* off-diagonal entries */ 566 bj[offd_so_far++] = mat_j[col]; 567 onz++; 568 } 569 } 570 ailen[j] = dnz; 571 bilen[j] = onz; 572 } 573 PetscFunctionReturn(0); 574 } 575 576 /* 577 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 578 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 579 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 580 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 581 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 582 */ 583 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 584 { 585 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 586 Mat A = aij->A; /* diagonal part of the matrix */ 587 Mat B = aij->B; /* offdiagonal part of the matrix */ 588 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 589 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 590 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 591 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 592 PetscInt *ailen = a->ilen,*aj = a->j; 593 PetscInt *bilen = b->ilen,*bj = b->j; 594 PetscInt am = aij->A->rmap->n,j; 595 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 596 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 597 PetscScalar *aa = a->a,*ba = b->a; 598 599 PetscFunctionBegin; 600 /* Iterate over all rows of the matrix */ 601 for (j=0; j<am; j++) { 602 dnz_row = onz_row = 0; 603 rowstart_offd = full_offd_i[j]; 604 rowstart_diag = full_diag_i[j]; 605 /* Iterate over all non-zero columns of the current row */ 606 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 607 /* If column is in the diagonal */ 608 if (mat_j[col] >= cstart && mat_j[col] < cend) { 609 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 610 aa[rowstart_diag+dnz_row] = mat_a[col]; 611 dnz_row++; 612 } else { /* off-diagonal entries */ 613 bj[rowstart_offd+onz_row] = mat_j[col]; 614 ba[rowstart_offd+onz_row] = mat_a[col]; 615 onz_row++; 616 } 617 } 618 ailen[j] = dnz_row; 619 bilen[j] = onz_row; 620 } 621 PetscFunctionReturn(0); 622 } 623 624 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 625 { 626 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 627 PetscErrorCode ierr; 628 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 629 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 630 631 PetscFunctionBegin; 632 for (i=0; i<m; i++) { 633 if (idxm[i] < 0) continue; /* negative row */ 634 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 635 if (idxm[i] >= rstart && idxm[i] < rend) { 636 row = idxm[i] - rstart; 637 for (j=0; j<n; j++) { 638 if (idxn[j] < 0) continue; /* negative column */ 639 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 640 if (idxn[j] >= cstart && idxn[j] < cend) { 641 col = idxn[j] - cstart; 642 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 643 } else { 644 if (!aij->colmap) { 645 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 646 } 647 #if defined(PETSC_USE_CTABLE) 648 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 649 col--; 650 #else 651 col = aij->colmap[idxn[j]] - 1; 652 #endif 653 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 654 else { 655 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 656 } 657 } 658 } 659 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 660 } 661 PetscFunctionReturn(0); 662 } 663 664 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 665 { 666 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 667 PetscErrorCode ierr; 668 PetscInt nstash,reallocs; 669 670 PetscFunctionBegin; 671 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 672 673 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 674 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 675 ierr = PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 676 PetscFunctionReturn(0); 677 } 678 679 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 680 { 681 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 682 PetscErrorCode ierr; 683 PetscMPIInt n; 684 PetscInt i,j,rstart,ncols,flg; 685 PetscInt *row,*col; 686 PetscBool other_disassembled; 687 PetscScalar *val; 688 689 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 690 691 PetscFunctionBegin; 692 if (!aij->donotstash && !mat->nooffprocentries) { 693 while (1) { 694 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 695 if (!flg) break; 696 697 for (i=0; i<n;) { 698 /* Now identify the consecutive vals belonging to the same row */ 699 for (j=i,rstart=row[j]; j<n; j++) { 700 if (row[j] != rstart) break; 701 } 702 if (j < n) ncols = j-i; 703 else ncols = n-i; 704 /* Now assemble all these values with a single function call */ 705 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 706 i = j; 707 } 708 } 709 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 710 } 711 #if defined(PETSC_HAVE_DEVICE) 712 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 713 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 714 if (mat->boundtocpu) { 715 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 716 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 717 } 718 #endif 719 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 720 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 721 722 /* determine if any processor has disassembled, if so we must 723 also disassemble ourself, in order that we may reassemble. */ 724 /* 725 if nonzero structure of submatrix B cannot change then we know that 726 no processor disassembled thus we can skip this stuff 727 */ 728 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 729 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 730 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 731 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 732 } 733 } 734 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 735 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 736 } 737 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 738 #if defined(PETSC_HAVE_DEVICE) 739 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 740 #endif 741 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 742 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 743 744 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 745 746 aij->rowvalues = NULL; 747 748 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 749 750 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 751 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 752 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 753 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 754 } 755 #if defined(PETSC_HAVE_DEVICE) 756 mat->offloadmask = PETSC_OFFLOAD_BOTH; 757 #endif 758 PetscFunctionReturn(0); 759 } 760 761 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 762 { 763 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 764 PetscErrorCode ierr; 765 766 PetscFunctionBegin; 767 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 768 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 769 PetscFunctionReturn(0); 770 } 771 772 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 773 { 774 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 775 PetscObjectState sA, sB; 776 PetscInt *lrows; 777 PetscInt r, len; 778 PetscBool cong, lch, gch; 779 PetscErrorCode ierr; 780 781 PetscFunctionBegin; 782 /* get locally owned rows */ 783 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 784 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 785 /* fix right hand side if needed */ 786 if (x && b) { 787 const PetscScalar *xx; 788 PetscScalar *bb; 789 790 PetscCheckFalse(!cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 791 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 792 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 793 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 794 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 795 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 796 } 797 798 sA = mat->A->nonzerostate; 799 sB = mat->B->nonzerostate; 800 801 if (diag != 0.0 && cong) { 802 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 803 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 804 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 805 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 806 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 807 PetscInt nnwA, nnwB; 808 PetscBool nnzA, nnzB; 809 810 nnwA = aijA->nonew; 811 nnwB = aijB->nonew; 812 nnzA = aijA->keepnonzeropattern; 813 nnzB = aijB->keepnonzeropattern; 814 if (!nnzA) { 815 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 816 aijA->nonew = 0; 817 } 818 if (!nnzB) { 819 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 820 aijB->nonew = 0; 821 } 822 /* Must zero here before the next loop */ 823 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 824 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 825 for (r = 0; r < len; ++r) { 826 const PetscInt row = lrows[r] + A->rmap->rstart; 827 if (row >= A->cmap->N) continue; 828 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 829 } 830 aijA->nonew = nnwA; 831 aijB->nonew = nnwB; 832 } else { 833 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 834 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 835 } 836 ierr = PetscFree(lrows);CHKERRQ(ierr); 837 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 838 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 839 840 /* reduce nonzerostate */ 841 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 842 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 843 if (gch) A->nonzerostate++; 844 PetscFunctionReturn(0); 845 } 846 847 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 848 { 849 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 850 PetscErrorCode ierr; 851 PetscMPIInt n = A->rmap->n; 852 PetscInt i,j,r,m,len = 0; 853 PetscInt *lrows,*owners = A->rmap->range; 854 PetscMPIInt p = 0; 855 PetscSFNode *rrows; 856 PetscSF sf; 857 const PetscScalar *xx; 858 PetscScalar *bb,*mask,*aij_a; 859 Vec xmask,lmask; 860 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 861 const PetscInt *aj, *ii,*ridx; 862 PetscScalar *aa; 863 864 PetscFunctionBegin; 865 /* Create SF where leaves are input rows and roots are owned rows */ 866 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 867 for (r = 0; r < n; ++r) lrows[r] = -1; 868 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 869 for (r = 0; r < N; ++r) { 870 const PetscInt idx = rows[r]; 871 PetscCheckFalse(idx < 0 || A->rmap->N <= idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 872 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 873 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 874 } 875 rrows[r].rank = p; 876 rrows[r].index = rows[r] - owners[p]; 877 } 878 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 879 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 880 /* Collect flags for rows to be zeroed */ 881 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 882 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 883 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 884 /* Compress and put in row numbers */ 885 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 886 /* zero diagonal part of matrix */ 887 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 888 /* handle off diagonal part of matrix */ 889 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 890 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 891 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 892 for (i=0; i<len; i++) bb[lrows[i]] = 1; 893 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 894 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 895 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 897 if (x && b) { /* this code is buggy when the row and column layout don't match */ 898 PetscBool cong; 899 900 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 901 PetscCheckFalse(!cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 902 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 903 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 904 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 905 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 906 } 907 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 908 /* remove zeroed rows of off diagonal matrix */ 909 ierr = MatSeqAIJGetArray(l->B,&aij_a);CHKERRQ(ierr); 910 ii = aij->i; 911 for (i=0; i<len; i++) { 912 ierr = PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 913 } 914 /* loop over all elements of off process part of matrix zeroing removed columns*/ 915 if (aij->compressedrow.use) { 916 m = aij->compressedrow.nrows; 917 ii = aij->compressedrow.i; 918 ridx = aij->compressedrow.rindex; 919 for (i=0; i<m; i++) { 920 n = ii[i+1] - ii[i]; 921 aj = aij->j + ii[i]; 922 aa = aij_a + ii[i]; 923 924 for (j=0; j<n; j++) { 925 if (PetscAbsScalar(mask[*aj])) { 926 if (b) bb[*ridx] -= *aa*xx[*aj]; 927 *aa = 0.0; 928 } 929 aa++; 930 aj++; 931 } 932 ridx++; 933 } 934 } else { /* do not use compressed row format */ 935 m = l->B->rmap->n; 936 for (i=0; i<m; i++) { 937 n = ii[i+1] - ii[i]; 938 aj = aij->j + ii[i]; 939 aa = aij_a + ii[i]; 940 for (j=0; j<n; j++) { 941 if (PetscAbsScalar(mask[*aj])) { 942 if (b) bb[i] -= *aa*xx[*aj]; 943 *aa = 0.0; 944 } 945 aa++; 946 aj++; 947 } 948 } 949 } 950 if (x && b) { 951 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 952 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 953 } 954 ierr = MatSeqAIJRestoreArray(l->B,&aij_a);CHKERRQ(ierr); 955 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 956 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 957 ierr = PetscFree(lrows);CHKERRQ(ierr); 958 959 /* only change matrix nonzero state if pattern was allowed to be changed */ 960 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 961 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 962 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 963 } 964 PetscFunctionReturn(0); 965 } 966 967 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 968 { 969 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 970 PetscErrorCode ierr; 971 PetscInt nt; 972 VecScatter Mvctx = a->Mvctx; 973 974 PetscFunctionBegin; 975 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 976 PetscCheckFalse(nt != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt); 977 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 978 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 979 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 980 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 985 { 986 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 987 PetscErrorCode ierr; 988 989 PetscFunctionBegin; 990 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 991 PetscFunctionReturn(0); 992 } 993 994 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 995 { 996 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 997 PetscErrorCode ierr; 998 VecScatter Mvctx = a->Mvctx; 999 1000 PetscFunctionBegin; 1001 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1003 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1004 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1009 { 1010 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1011 PetscErrorCode ierr; 1012 1013 PetscFunctionBegin; 1014 /* do nondiagonal part */ 1015 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1016 /* do local part */ 1017 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1018 /* add partial results together */ 1019 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1020 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1021 PetscFunctionReturn(0); 1022 } 1023 1024 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1025 { 1026 MPI_Comm comm; 1027 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1028 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1029 IS Me,Notme; 1030 PetscErrorCode ierr; 1031 PetscInt M,N,first,last,*notme,i; 1032 PetscBool lf; 1033 PetscMPIInt size; 1034 1035 PetscFunctionBegin; 1036 /* Easy test: symmetric diagonal block */ 1037 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1038 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1039 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr); 1040 if (!*f) PetscFunctionReturn(0); 1041 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1042 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 1043 if (size == 1) PetscFunctionReturn(0); 1044 1045 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1046 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1047 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1048 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1049 for (i=0; i<first; i++) notme[i] = i; 1050 for (i=last; i<M; i++) notme[i-last+first] = i; 1051 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1052 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1053 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1054 Aoff = Aoffs[0]; 1055 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1056 Boff = Boffs[0]; 1057 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1058 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1059 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1060 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1061 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1062 ierr = PetscFree(notme);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1067 { 1068 PetscErrorCode ierr; 1069 1070 PetscFunctionBegin; 1071 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1072 PetscFunctionReturn(0); 1073 } 1074 1075 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1076 { 1077 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1078 PetscErrorCode ierr; 1079 1080 PetscFunctionBegin; 1081 /* do nondiagonal part */ 1082 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1083 /* do local part */ 1084 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1085 /* add partial results together */ 1086 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1087 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } 1090 1091 /* 1092 This only works correctly for square matrices where the subblock A->A is the 1093 diagonal block 1094 */ 1095 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1096 { 1097 PetscErrorCode ierr; 1098 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1099 1100 PetscFunctionBegin; 1101 PetscCheckFalse(A->rmap->N != A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1102 PetscCheckFalse(A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1103 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1108 { 1109 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1110 PetscErrorCode ierr; 1111 1112 PetscFunctionBegin; 1113 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1114 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1119 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1120 { 1121 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1122 PetscErrorCode ierr; 1123 1124 PetscFunctionBegin; 1125 ierr = PetscSFDestroy(&aij->coo_sf);CHKERRQ(ierr); 1126 ierr = PetscFree4(aij->Aperm1,aij->Bperm1,aij->Ajmap1,aij->Bjmap1);CHKERRQ(ierr); 1127 ierr = PetscFree4(aij->Aperm2,aij->Bperm2,aij->Ajmap2,aij->Bjmap2);CHKERRQ(ierr); 1128 ierr = PetscFree4(aij->Aimap1,aij->Bimap1,aij->Aimap2,aij->Bimap2);CHKERRQ(ierr); 1129 ierr = PetscFree2(aij->sendbuf,aij->recvbuf);CHKERRQ(ierr); 1130 ierr = PetscFree(aij->Cperm1);CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1135 { 1136 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1137 PetscErrorCode ierr; 1138 1139 PetscFunctionBegin; 1140 #if defined(PETSC_USE_LOG) 1141 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1142 #endif 1143 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1144 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1145 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1146 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1147 #if defined(PETSC_USE_CTABLE) 1148 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1149 #else 1150 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1151 #endif 1152 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1153 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1154 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1155 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1156 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1157 1158 /* Free COO */ 1159 ierr = MatResetPreallocationCOO_MPIAIJ(mat);CHKERRQ(ierr); 1160 1161 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1162 1163 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1164 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1165 1166 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1169 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1170 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1174 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1175 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1176 #if defined(PETSC_HAVE_CUDA) 1177 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1178 #endif 1179 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1180 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr); 1181 #endif 1182 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 1183 #if defined(PETSC_HAVE_ELEMENTAL) 1184 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1185 #endif 1186 #if defined(PETSC_HAVE_SCALAPACK) 1187 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1188 #endif 1189 #if defined(PETSC_HAVE_HYPRE) 1190 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1191 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1192 #endif 1193 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1194 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1195 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1196 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr); 1197 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr); 1198 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr); 1199 #if defined(PETSC_HAVE_MKL_SPARSE) 1200 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr); 1201 #endif 1202 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr); 1203 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1204 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr); 1205 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL);CHKERRQ(ierr); 1206 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL);CHKERRQ(ierr); 1207 PetscFunctionReturn(0); 1208 } 1209 1210 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1211 { 1212 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1213 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1214 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1215 const PetscInt *garray = aij->garray; 1216 const PetscScalar *aa,*ba; 1217 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1218 PetscInt *rowlens; 1219 PetscInt *colidxs; 1220 PetscScalar *matvals; 1221 PetscErrorCode ierr; 1222 1223 PetscFunctionBegin; 1224 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1225 1226 M = mat->rmap->N; 1227 N = mat->cmap->N; 1228 m = mat->rmap->n; 1229 rs = mat->rmap->rstart; 1230 cs = mat->cmap->rstart; 1231 nz = A->nz + B->nz; 1232 1233 /* write matrix header */ 1234 header[0] = MAT_FILE_CLASSID; 1235 header[1] = M; header[2] = N; header[3] = nz; 1236 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1237 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1238 1239 /* fill in and store row lengths */ 1240 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1241 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1242 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1243 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1244 1245 /* fill in and store column indices */ 1246 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1247 for (cnt=0, i=0; i<m; i++) { 1248 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1249 if (garray[B->j[jb]] > cs) break; 1250 colidxs[cnt++] = garray[B->j[jb]]; 1251 } 1252 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1253 colidxs[cnt++] = A->j[ja] + cs; 1254 for (; jb<B->i[i+1]; jb++) 1255 colidxs[cnt++] = garray[B->j[jb]]; 1256 } 1257 PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1258 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1259 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1260 1261 /* fill in and store nonzero values */ 1262 ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr); 1263 ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr); 1264 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1265 for (cnt=0, i=0; i<m; i++) { 1266 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1267 if (garray[B->j[jb]] > cs) break; 1268 matvals[cnt++] = ba[jb]; 1269 } 1270 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1271 matvals[cnt++] = aa[ja]; 1272 for (; jb<B->i[i+1]; jb++) 1273 matvals[cnt++] = ba[jb]; 1274 } 1275 ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr); 1276 ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr); 1277 PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1278 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1279 ierr = PetscFree(matvals);CHKERRQ(ierr); 1280 1281 /* write block size option to the viewer's .info file */ 1282 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #include <petscdraw.h> 1287 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1288 { 1289 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1290 PetscErrorCode ierr; 1291 PetscMPIInt rank = aij->rank,size = aij->size; 1292 PetscBool isdraw,iascii,isbinary; 1293 PetscViewer sviewer; 1294 PetscViewerFormat format; 1295 1296 PetscFunctionBegin; 1297 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1298 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1299 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1300 if (iascii) { 1301 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1302 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1303 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1304 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1305 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1306 for (i=0; i<(PetscInt)size; i++) { 1307 nmax = PetscMax(nmax,nz[i]); 1308 nmin = PetscMin(nmin,nz[i]); 1309 navg += nz[i]; 1310 } 1311 ierr = PetscFree(nz);CHKERRQ(ierr); 1312 navg = navg/size; 1313 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n",nmin,navg,nmax);CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } 1316 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1317 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1318 MatInfo info; 1319 PetscInt *inodes=NULL; 1320 1321 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 1322 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1323 ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr); 1324 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1325 if (!inodes) { 1326 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", 1327 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1328 } else { 1329 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", 1330 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1331 } 1332 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1333 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1334 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1335 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1336 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1337 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1338 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1339 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1342 PetscInt inodecount,inodelimit,*inodes; 1343 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1344 if (inodes) { 1345 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit);CHKERRQ(ierr); 1346 } else { 1347 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1348 } 1349 PetscFunctionReturn(0); 1350 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1351 PetscFunctionReturn(0); 1352 } 1353 } else if (isbinary) { 1354 if (size == 1) { 1355 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1356 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1357 } else { 1358 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1359 } 1360 PetscFunctionReturn(0); 1361 } else if (iascii && size == 1) { 1362 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1363 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1364 PetscFunctionReturn(0); 1365 } else if (isdraw) { 1366 PetscDraw draw; 1367 PetscBool isnull; 1368 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1369 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1370 if (isnull) PetscFunctionReturn(0); 1371 } 1372 1373 { /* assemble the entire matrix onto first processor */ 1374 Mat A = NULL, Av; 1375 IS isrow,iscol; 1376 1377 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1378 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1379 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1380 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1381 /* The commented code uses MatCreateSubMatrices instead */ 1382 /* 1383 Mat *AA, A = NULL, Av; 1384 IS isrow,iscol; 1385 1386 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1387 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1388 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1389 if (rank == 0) { 1390 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1391 A = AA[0]; 1392 Av = AA[0]; 1393 } 1394 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1395 */ 1396 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1397 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1398 /* 1399 Everyone has to call to draw the matrix since the graphics waits are 1400 synchronized across all processors that share the PetscDraw object 1401 */ 1402 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1403 if (rank == 0) { 1404 if (((PetscObject)mat)->name) { 1405 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1406 } 1407 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1408 } 1409 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1410 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1411 ierr = MatDestroy(&A);CHKERRQ(ierr); 1412 } 1413 PetscFunctionReturn(0); 1414 } 1415 1416 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1417 { 1418 PetscErrorCode ierr; 1419 PetscBool iascii,isdraw,issocket,isbinary; 1420 1421 PetscFunctionBegin; 1422 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1423 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1424 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1425 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1426 if (iascii || isdraw || isbinary || issocket) { 1427 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1428 } 1429 PetscFunctionReturn(0); 1430 } 1431 1432 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1433 { 1434 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1435 PetscErrorCode ierr; 1436 Vec bb1 = NULL; 1437 PetscBool hasop; 1438 1439 PetscFunctionBegin; 1440 if (flag == SOR_APPLY_UPPER) { 1441 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1442 PetscFunctionReturn(0); 1443 } 1444 1445 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1446 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1447 } 1448 1449 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1450 if (flag & SOR_ZERO_INITIAL_GUESS) { 1451 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1452 its--; 1453 } 1454 1455 while (its--) { 1456 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1457 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1458 1459 /* update rhs: bb1 = bb - B*x */ 1460 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1461 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1462 1463 /* local sweep */ 1464 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1465 } 1466 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1467 if (flag & SOR_ZERO_INITIAL_GUESS) { 1468 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1469 its--; 1470 } 1471 while (its--) { 1472 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1473 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1474 1475 /* update rhs: bb1 = bb - B*x */ 1476 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1477 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1478 1479 /* local sweep */ 1480 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1481 } 1482 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1483 if (flag & SOR_ZERO_INITIAL_GUESS) { 1484 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1485 its--; 1486 } 1487 while (its--) { 1488 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1489 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1490 1491 /* update rhs: bb1 = bb - B*x */ 1492 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1493 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1494 1495 /* local sweep */ 1496 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1497 } 1498 } else if (flag & SOR_EISENSTAT) { 1499 Vec xx1; 1500 1501 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1502 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1503 1504 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1505 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1506 if (!mat->diag) { 1507 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1508 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1509 } 1510 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1511 if (hasop) { 1512 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1513 } else { 1514 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1515 } 1516 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1517 1518 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1519 1520 /* local sweep */ 1521 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1522 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1523 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1524 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1525 1526 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1527 1528 matin->factorerrortype = mat->A->factorerrortype; 1529 PetscFunctionReturn(0); 1530 } 1531 1532 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1533 { 1534 Mat aA,aB,Aperm; 1535 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1536 PetscScalar *aa,*ba; 1537 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1538 PetscSF rowsf,sf; 1539 IS parcolp = NULL; 1540 PetscBool done; 1541 PetscErrorCode ierr; 1542 1543 PetscFunctionBegin; 1544 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1545 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1546 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1547 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1548 1549 /* Invert row permutation to find out where my rows should go */ 1550 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1551 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1552 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1553 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1554 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1555 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1556 1557 /* Invert column permutation to find out where my columns should go */ 1558 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1559 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1560 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1561 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1562 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1563 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1564 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1565 1566 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1567 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1568 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1569 1570 /* Find out where my gcols should go */ 1571 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1572 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1573 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1574 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1575 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1576 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1577 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1578 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1579 1580 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1581 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1582 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1583 for (i=0; i<m; i++) { 1584 PetscInt row = rdest[i]; 1585 PetscMPIInt rowner; 1586 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1587 for (j=ai[i]; j<ai[i+1]; j++) { 1588 PetscInt col = cdest[aj[j]]; 1589 PetscMPIInt cowner; 1590 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1591 if (rowner == cowner) dnnz[i]++; 1592 else onnz[i]++; 1593 } 1594 for (j=bi[i]; j<bi[i+1]; j++) { 1595 PetscInt col = gcdest[bj[j]]; 1596 PetscMPIInt cowner; 1597 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1598 if (rowner == cowner) dnnz[i]++; 1599 else onnz[i]++; 1600 } 1601 } 1602 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1603 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1604 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1605 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1606 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1607 1608 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1609 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1610 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1611 for (i=0; i<m; i++) { 1612 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1613 PetscInt j0,rowlen; 1614 rowlen = ai[i+1] - ai[i]; 1615 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1616 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1617 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1618 } 1619 rowlen = bi[i+1] - bi[i]; 1620 for (j0=j=0; j<rowlen; j0=j) { 1621 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1622 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1623 } 1624 } 1625 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1626 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1627 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1628 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1629 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1630 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1631 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1632 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1633 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1634 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1635 *B = Aperm; 1636 PetscFunctionReturn(0); 1637 } 1638 1639 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1640 { 1641 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1642 PetscErrorCode ierr; 1643 1644 PetscFunctionBegin; 1645 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1646 if (ghosts) *ghosts = aij->garray; 1647 PetscFunctionReturn(0); 1648 } 1649 1650 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1651 { 1652 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1653 Mat A = mat->A,B = mat->B; 1654 PetscErrorCode ierr; 1655 PetscLogDouble isend[5],irecv[5]; 1656 1657 PetscFunctionBegin; 1658 info->block_size = 1.0; 1659 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1660 1661 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1662 isend[3] = info->memory; isend[4] = info->mallocs; 1663 1664 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1665 1666 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1667 isend[3] += info->memory; isend[4] += info->mallocs; 1668 if (flag == MAT_LOCAL) { 1669 info->nz_used = isend[0]; 1670 info->nz_allocated = isend[1]; 1671 info->nz_unneeded = isend[2]; 1672 info->memory = isend[3]; 1673 info->mallocs = isend[4]; 1674 } else if (flag == MAT_GLOBAL_MAX) { 1675 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1676 1677 info->nz_used = irecv[0]; 1678 info->nz_allocated = irecv[1]; 1679 info->nz_unneeded = irecv[2]; 1680 info->memory = irecv[3]; 1681 info->mallocs = irecv[4]; 1682 } else if (flag == MAT_GLOBAL_SUM) { 1683 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1684 1685 info->nz_used = irecv[0]; 1686 info->nz_allocated = irecv[1]; 1687 info->nz_unneeded = irecv[2]; 1688 info->memory = irecv[3]; 1689 info->mallocs = irecv[4]; 1690 } 1691 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1692 info->fill_ratio_needed = 0; 1693 info->factor_mallocs = 0; 1694 PetscFunctionReturn(0); 1695 } 1696 1697 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1698 { 1699 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1700 PetscErrorCode ierr; 1701 1702 PetscFunctionBegin; 1703 switch (op) { 1704 case MAT_NEW_NONZERO_LOCATIONS: 1705 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1706 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1707 case MAT_KEEP_NONZERO_PATTERN: 1708 case MAT_NEW_NONZERO_LOCATION_ERR: 1709 case MAT_USE_INODES: 1710 case MAT_IGNORE_ZERO_ENTRIES: 1711 case MAT_FORM_EXPLICIT_TRANSPOSE: 1712 MatCheckPreallocated(A,1); 1713 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1714 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1715 break; 1716 case MAT_ROW_ORIENTED: 1717 MatCheckPreallocated(A,1); 1718 a->roworiented = flg; 1719 1720 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1721 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1722 break; 1723 case MAT_FORCE_DIAGONAL_ENTRIES: 1724 case MAT_SORTED_FULL: 1725 ierr = PetscInfo(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1726 break; 1727 case MAT_IGNORE_OFF_PROC_ENTRIES: 1728 a->donotstash = flg; 1729 break; 1730 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1731 case MAT_SPD: 1732 case MAT_SYMMETRIC: 1733 case MAT_STRUCTURALLY_SYMMETRIC: 1734 case MAT_HERMITIAN: 1735 case MAT_SYMMETRY_ETERNAL: 1736 break; 1737 case MAT_SUBMAT_SINGLEIS: 1738 A->submat_singleis = flg; 1739 break; 1740 case MAT_STRUCTURE_ONLY: 1741 /* The option is handled directly by MatSetOption() */ 1742 break; 1743 default: 1744 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1745 } 1746 PetscFunctionReturn(0); 1747 } 1748 1749 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1750 { 1751 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1752 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1753 PetscErrorCode ierr; 1754 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1755 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1756 PetscInt *cmap,*idx_p; 1757 1758 PetscFunctionBegin; 1759 PetscCheckFalse(mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1760 mat->getrowactive = PETSC_TRUE; 1761 1762 if (!mat->rowvalues && (idx || v)) { 1763 /* 1764 allocate enough space to hold information from the longest row. 1765 */ 1766 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1767 PetscInt max = 1,tmp; 1768 for (i=0; i<matin->rmap->n; i++) { 1769 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1770 if (max < tmp) max = tmp; 1771 } 1772 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1773 } 1774 1775 PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1776 lrow = row - rstart; 1777 1778 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1779 if (!v) {pvA = NULL; pvB = NULL;} 1780 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1781 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1782 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1783 nztot = nzA + nzB; 1784 1785 cmap = mat->garray; 1786 if (v || idx) { 1787 if (nztot) { 1788 /* Sort by increasing column numbers, assuming A and B already sorted */ 1789 PetscInt imark = -1; 1790 if (v) { 1791 *v = v_p = mat->rowvalues; 1792 for (i=0; i<nzB; i++) { 1793 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1794 else break; 1795 } 1796 imark = i; 1797 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1798 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1799 } 1800 if (idx) { 1801 *idx = idx_p = mat->rowindices; 1802 if (imark > -1) { 1803 for (i=0; i<imark; i++) { 1804 idx_p[i] = cmap[cworkB[i]]; 1805 } 1806 } else { 1807 for (i=0; i<nzB; i++) { 1808 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1809 else break; 1810 } 1811 imark = i; 1812 } 1813 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1814 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1815 } 1816 } else { 1817 if (idx) *idx = NULL; 1818 if (v) *v = NULL; 1819 } 1820 } 1821 *nz = nztot; 1822 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1823 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1824 PetscFunctionReturn(0); 1825 } 1826 1827 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1828 { 1829 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1830 1831 PetscFunctionBegin; 1832 PetscCheckFalse(!aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1833 aij->getrowactive = PETSC_FALSE; 1834 PetscFunctionReturn(0); 1835 } 1836 1837 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1838 { 1839 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1840 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1841 PetscErrorCode ierr; 1842 PetscInt i,j,cstart = mat->cmap->rstart; 1843 PetscReal sum = 0.0; 1844 const MatScalar *v,*amata,*bmata; 1845 1846 PetscFunctionBegin; 1847 if (aij->size == 1) { 1848 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1849 } else { 1850 ierr = MatSeqAIJGetArrayRead(aij->A,&amata);CHKERRQ(ierr); 1851 ierr = MatSeqAIJGetArrayRead(aij->B,&bmata);CHKERRQ(ierr); 1852 if (type == NORM_FROBENIUS) { 1853 v = amata; 1854 for (i=0; i<amat->nz; i++) { 1855 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1856 } 1857 v = bmata; 1858 for (i=0; i<bmat->nz; i++) { 1859 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1860 } 1861 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1862 *norm = PetscSqrtReal(*norm); 1863 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1864 } else if (type == NORM_1) { /* max column norm */ 1865 PetscReal *tmp,*tmp2; 1866 PetscInt *jj,*garray = aij->garray; 1867 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1868 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1869 *norm = 0.0; 1870 v = amata; jj = amat->j; 1871 for (j=0; j<amat->nz; j++) { 1872 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1873 } 1874 v = bmata; jj = bmat->j; 1875 for (j=0; j<bmat->nz; j++) { 1876 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1877 } 1878 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1879 for (j=0; j<mat->cmap->N; j++) { 1880 if (tmp2[j] > *norm) *norm = tmp2[j]; 1881 } 1882 ierr = PetscFree(tmp);CHKERRQ(ierr); 1883 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1884 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1885 } else if (type == NORM_INFINITY) { /* max row norm */ 1886 PetscReal ntemp = 0.0; 1887 for (j=0; j<aij->A->rmap->n; j++) { 1888 v = amata + amat->i[j]; 1889 sum = 0.0; 1890 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1891 sum += PetscAbsScalar(*v); v++; 1892 } 1893 v = bmata + bmat->i[j]; 1894 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1895 sum += PetscAbsScalar(*v); v++; 1896 } 1897 if (sum > ntemp) ntemp = sum; 1898 } 1899 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1900 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1901 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1902 ierr = MatSeqAIJRestoreArrayRead(aij->A,&amata);CHKERRQ(ierr); 1903 ierr = MatSeqAIJRestoreArrayRead(aij->B,&bmata);CHKERRQ(ierr); 1904 } 1905 PetscFunctionReturn(0); 1906 } 1907 1908 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1909 { 1910 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1911 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1912 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1913 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1914 PetscErrorCode ierr; 1915 Mat B,A_diag,*B_diag; 1916 const MatScalar *pbv,*bv; 1917 1918 PetscFunctionBegin; 1919 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1920 ai = Aloc->i; aj = Aloc->j; 1921 bi = Bloc->i; bj = Bloc->j; 1922 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1923 PetscInt *d_nnz,*g_nnz,*o_nnz; 1924 PetscSFNode *oloc; 1925 PETSC_UNUSED PetscSF sf; 1926 1927 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1928 /* compute d_nnz for preallocation */ 1929 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 1930 for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++; 1931 /* compute local off-diagonal contributions */ 1932 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 1933 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1934 /* map those to global */ 1935 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1936 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1937 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1938 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 1939 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1940 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1941 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1942 1943 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1944 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1945 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1946 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1947 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1948 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1949 } else { 1950 B = *matout; 1951 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1952 } 1953 1954 b = (Mat_MPIAIJ*)B->data; 1955 A_diag = a->A; 1956 B_diag = &b->A; 1957 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1958 A_diag_ncol = A_diag->cmap->N; 1959 B_diag_ilen = sub_B_diag->ilen; 1960 B_diag_i = sub_B_diag->i; 1961 1962 /* Set ilen for diagonal of B */ 1963 for (i=0; i<A_diag_ncol; i++) { 1964 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1965 } 1966 1967 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1968 very quickly (=without using MatSetValues), because all writes are local. */ 1969 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 1970 1971 /* copy over the B part */ 1972 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 1973 ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr); 1974 pbv = bv; 1975 row = A->rmap->rstart; 1976 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1977 cols_tmp = cols; 1978 for (i=0; i<mb; i++) { 1979 ncol = bi[i+1]-bi[i]; 1980 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr); 1981 row++; 1982 pbv += ncol; cols_tmp += ncol; 1983 } 1984 ierr = PetscFree(cols);CHKERRQ(ierr); 1985 ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr); 1986 1987 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1988 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1989 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1990 *matout = B; 1991 } else { 1992 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1993 } 1994 PetscFunctionReturn(0); 1995 } 1996 1997 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1998 { 1999 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2000 Mat a = aij->A,b = aij->B; 2001 PetscErrorCode ierr; 2002 PetscInt s1,s2,s3; 2003 2004 PetscFunctionBegin; 2005 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2006 if (rr) { 2007 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2008 PetscCheckFalse(s1!=s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2009 /* Overlap communication with computation. */ 2010 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2011 } 2012 if (ll) { 2013 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2014 PetscCheckFalse(s1!=s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2015 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 2016 } 2017 /* scale the diagonal block */ 2018 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2019 2020 if (rr) { 2021 /* Do a scatter end and then right scale the off-diagonal block */ 2022 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2023 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 2024 } 2025 PetscFunctionReturn(0); 2026 } 2027 2028 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2029 { 2030 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2031 PetscErrorCode ierr; 2032 2033 PetscFunctionBegin; 2034 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2035 PetscFunctionReturn(0); 2036 } 2037 2038 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2039 { 2040 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2041 Mat a,b,c,d; 2042 PetscBool flg; 2043 PetscErrorCode ierr; 2044 2045 PetscFunctionBegin; 2046 a = matA->A; b = matA->B; 2047 c = matB->A; d = matB->B; 2048 2049 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2050 if (flg) { 2051 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2052 } 2053 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2054 PetscFunctionReturn(0); 2055 } 2056 2057 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2058 { 2059 PetscErrorCode ierr; 2060 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2061 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2062 2063 PetscFunctionBegin; 2064 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2065 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2066 /* because of the column compression in the off-processor part of the matrix a->B, 2067 the number of columns in a->B and b->B may be different, hence we cannot call 2068 the MatCopy() directly on the two parts. If need be, we can provide a more 2069 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2070 then copying the submatrices */ 2071 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2072 } else { 2073 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2074 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2075 } 2076 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2077 PetscFunctionReturn(0); 2078 } 2079 2080 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2081 { 2082 PetscErrorCode ierr; 2083 2084 PetscFunctionBegin; 2085 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2086 PetscFunctionReturn(0); 2087 } 2088 2089 /* 2090 Computes the number of nonzeros per row needed for preallocation when X and Y 2091 have different nonzero structure. 2092 */ 2093 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2094 { 2095 PetscInt i,j,k,nzx,nzy; 2096 2097 PetscFunctionBegin; 2098 /* Set the number of nonzeros in the new matrix */ 2099 for (i=0; i<m; i++) { 2100 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2101 nzx = xi[i+1] - xi[i]; 2102 nzy = yi[i+1] - yi[i]; 2103 nnz[i] = 0; 2104 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2105 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2106 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2107 nnz[i]++; 2108 } 2109 for (; k<nzy; k++) nnz[i]++; 2110 } 2111 PetscFunctionReturn(0); 2112 } 2113 2114 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2115 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2116 { 2117 PetscErrorCode ierr; 2118 PetscInt m = Y->rmap->N; 2119 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2120 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2121 2122 PetscFunctionBegin; 2123 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2124 PetscFunctionReturn(0); 2125 } 2126 2127 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2128 { 2129 PetscErrorCode ierr; 2130 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2131 2132 PetscFunctionBegin; 2133 if (str == SAME_NONZERO_PATTERN) { 2134 ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr); 2135 ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr); 2136 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2137 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2138 } else { 2139 Mat B; 2140 PetscInt *nnz_d,*nnz_o; 2141 2142 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2143 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2144 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2145 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2146 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2147 ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr); 2148 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2149 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2150 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2151 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2152 ierr = MatHeaderMerge(Y,&B);CHKERRQ(ierr); 2153 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2154 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2155 } 2156 PetscFunctionReturn(0); 2157 } 2158 2159 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2160 2161 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2162 { 2163 #if defined(PETSC_USE_COMPLEX) 2164 PetscErrorCode ierr; 2165 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2166 2167 PetscFunctionBegin; 2168 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2169 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2170 #else 2171 PetscFunctionBegin; 2172 #endif 2173 PetscFunctionReturn(0); 2174 } 2175 2176 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2177 { 2178 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2179 PetscErrorCode ierr; 2180 2181 PetscFunctionBegin; 2182 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2183 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2184 PetscFunctionReturn(0); 2185 } 2186 2187 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2188 { 2189 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2190 PetscErrorCode ierr; 2191 2192 PetscFunctionBegin; 2193 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2194 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2195 PetscFunctionReturn(0); 2196 } 2197 2198 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2199 { 2200 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2201 PetscErrorCode ierr; 2202 PetscInt i,*idxb = NULL,m = A->rmap->n; 2203 PetscScalar *va,*vv; 2204 Vec vB,vA; 2205 const PetscScalar *vb; 2206 2207 PetscFunctionBegin; 2208 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 2209 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2210 2211 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2212 if (idx) { 2213 for (i=0; i<m; i++) { 2214 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2215 } 2216 } 2217 2218 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 2219 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 2220 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2221 2222 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2223 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2224 for (i=0; i<m; i++) { 2225 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2226 vv[i] = vb[i]; 2227 if (idx) idx[i] = a->garray[idxb[i]]; 2228 } else { 2229 vv[i] = va[i]; 2230 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2231 idx[i] = a->garray[idxb[i]]; 2232 } 2233 } 2234 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2235 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2236 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2237 ierr = PetscFree(idxb);CHKERRQ(ierr); 2238 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2239 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2240 PetscFunctionReturn(0); 2241 } 2242 2243 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2244 { 2245 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2246 PetscInt m = A->rmap->n,n = A->cmap->n; 2247 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2248 PetscInt *cmap = mat->garray; 2249 PetscInt *diagIdx, *offdiagIdx; 2250 Vec diagV, offdiagV; 2251 PetscScalar *a, *diagA, *offdiagA; 2252 const PetscScalar *ba,*bav; 2253 PetscInt r,j,col,ncols,*bi,*bj; 2254 PetscErrorCode ierr; 2255 Mat B = mat->B; 2256 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2257 2258 PetscFunctionBegin; 2259 /* When a process holds entire A and other processes have no entry */ 2260 if (A->cmap->N == n) { 2261 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2262 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2263 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr); 2264 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2265 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2266 PetscFunctionReturn(0); 2267 } else if (n == 0) { 2268 if (m) { 2269 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2270 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2271 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2272 } 2273 PetscFunctionReturn(0); 2274 } 2275 2276 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2277 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2278 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2279 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2280 2281 /* Get offdiagIdx[] for implicit 0.0 */ 2282 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2283 ba = bav; 2284 bi = b->i; 2285 bj = b->j; 2286 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2287 for (r = 0; r < m; r++) { 2288 ncols = bi[r+1] - bi[r]; 2289 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2290 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2291 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2292 offdiagA[r] = 0.0; 2293 2294 /* Find first hole in the cmap */ 2295 for (j=0; j<ncols; j++) { 2296 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2297 if (col > j && j < cstart) { 2298 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2299 break; 2300 } else if (col > j + n && j >= cstart) { 2301 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2302 break; 2303 } 2304 } 2305 if (j == ncols && ncols < A->cmap->N - n) { 2306 /* a hole is outside compressed Bcols */ 2307 if (ncols == 0) { 2308 if (cstart) { 2309 offdiagIdx[r] = 0; 2310 } else offdiagIdx[r] = cend; 2311 } else { /* ncols > 0 */ 2312 offdiagIdx[r] = cmap[ncols-1] + 1; 2313 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2314 } 2315 } 2316 } 2317 2318 for (j=0; j<ncols; j++) { 2319 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2320 ba++; bj++; 2321 } 2322 } 2323 2324 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2325 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2326 for (r = 0; r < m; ++r) { 2327 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2328 a[r] = diagA[r]; 2329 if (idx) idx[r] = cstart + diagIdx[r]; 2330 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2331 a[r] = diagA[r]; 2332 if (idx) { 2333 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2334 idx[r] = cstart + diagIdx[r]; 2335 } else idx[r] = offdiagIdx[r]; 2336 } 2337 } else { 2338 a[r] = offdiagA[r]; 2339 if (idx) idx[r] = offdiagIdx[r]; 2340 } 2341 } 2342 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2343 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2344 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2345 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2346 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2347 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2348 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2349 PetscFunctionReturn(0); 2350 } 2351 2352 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2353 { 2354 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2355 PetscInt m = A->rmap->n,n = A->cmap->n; 2356 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2357 PetscInt *cmap = mat->garray; 2358 PetscInt *diagIdx, *offdiagIdx; 2359 Vec diagV, offdiagV; 2360 PetscScalar *a, *diagA, *offdiagA; 2361 const PetscScalar *ba,*bav; 2362 PetscInt r,j,col,ncols,*bi,*bj; 2363 PetscErrorCode ierr; 2364 Mat B = mat->B; 2365 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2366 2367 PetscFunctionBegin; 2368 /* When a process holds entire A and other processes have no entry */ 2369 if (A->cmap->N == n) { 2370 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2371 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2372 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2373 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2374 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2375 PetscFunctionReturn(0); 2376 } else if (n == 0) { 2377 if (m) { 2378 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2379 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2380 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2381 } 2382 PetscFunctionReturn(0); 2383 } 2384 2385 ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2386 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2387 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2388 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2389 2390 /* Get offdiagIdx[] for implicit 0.0 */ 2391 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2392 ba = bav; 2393 bi = b->i; 2394 bj = b->j; 2395 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2396 for (r = 0; r < m; r++) { 2397 ncols = bi[r+1] - bi[r]; 2398 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2399 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2400 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2401 offdiagA[r] = 0.0; 2402 2403 /* Find first hole in the cmap */ 2404 for (j=0; j<ncols; j++) { 2405 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2406 if (col > j && j < cstart) { 2407 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2408 break; 2409 } else if (col > j + n && j >= cstart) { 2410 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2411 break; 2412 } 2413 } 2414 if (j == ncols && ncols < A->cmap->N - n) { 2415 /* a hole is outside compressed Bcols */ 2416 if (ncols == 0) { 2417 if (cstart) { 2418 offdiagIdx[r] = 0; 2419 } else offdiagIdx[r] = cend; 2420 } else { /* ncols > 0 */ 2421 offdiagIdx[r] = cmap[ncols-1] + 1; 2422 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2423 } 2424 } 2425 } 2426 2427 for (j=0; j<ncols; j++) { 2428 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2429 ba++; bj++; 2430 } 2431 } 2432 2433 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2434 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2435 for (r = 0; r < m; ++r) { 2436 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2437 a[r] = diagA[r]; 2438 if (idx) idx[r] = cstart + diagIdx[r]; 2439 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2440 a[r] = diagA[r]; 2441 if (idx) { 2442 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2443 idx[r] = cstart + diagIdx[r]; 2444 } else idx[r] = offdiagIdx[r]; 2445 } 2446 } else { 2447 a[r] = offdiagA[r]; 2448 if (idx) idx[r] = offdiagIdx[r]; 2449 } 2450 } 2451 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2452 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2453 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2454 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2455 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2456 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2457 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2458 PetscFunctionReturn(0); 2459 } 2460 2461 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2462 { 2463 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2464 PetscInt m = A->rmap->n,n = A->cmap->n; 2465 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2466 PetscInt *cmap = mat->garray; 2467 PetscInt *diagIdx, *offdiagIdx; 2468 Vec diagV, offdiagV; 2469 PetscScalar *a, *diagA, *offdiagA; 2470 const PetscScalar *ba,*bav; 2471 PetscInt r,j,col,ncols,*bi,*bj; 2472 PetscErrorCode ierr; 2473 Mat B = mat->B; 2474 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2475 2476 PetscFunctionBegin; 2477 /* When a process holds entire A and other processes have no entry */ 2478 if (A->cmap->N == n) { 2479 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2480 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2481 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2482 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2483 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2484 PetscFunctionReturn(0); 2485 } else if (n == 0) { 2486 if (m) { 2487 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2488 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2489 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2490 } 2491 PetscFunctionReturn(0); 2492 } 2493 2494 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2495 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2496 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2497 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2498 2499 /* Get offdiagIdx[] for implicit 0.0 */ 2500 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2501 ba = bav; 2502 bi = b->i; 2503 bj = b->j; 2504 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2505 for (r = 0; r < m; r++) { 2506 ncols = bi[r+1] - bi[r]; 2507 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2508 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2509 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2510 offdiagA[r] = 0.0; 2511 2512 /* Find first hole in the cmap */ 2513 for (j=0; j<ncols; j++) { 2514 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2515 if (col > j && j < cstart) { 2516 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2517 break; 2518 } else if (col > j + n && j >= cstart) { 2519 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2520 break; 2521 } 2522 } 2523 if (j == ncols && ncols < A->cmap->N - n) { 2524 /* a hole is outside compressed Bcols */ 2525 if (ncols == 0) { 2526 if (cstart) { 2527 offdiagIdx[r] = 0; 2528 } else offdiagIdx[r] = cend; 2529 } else { /* ncols > 0 */ 2530 offdiagIdx[r] = cmap[ncols-1] + 1; 2531 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2532 } 2533 } 2534 } 2535 2536 for (j=0; j<ncols; j++) { 2537 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2538 ba++; bj++; 2539 } 2540 } 2541 2542 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2543 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2544 for (r = 0; r < m; ++r) { 2545 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2546 a[r] = diagA[r]; 2547 if (idx) idx[r] = cstart + diagIdx[r]; 2548 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2549 a[r] = diagA[r]; 2550 if (idx) { 2551 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2552 idx[r] = cstart + diagIdx[r]; 2553 } else idx[r] = offdiagIdx[r]; 2554 } 2555 } else { 2556 a[r] = offdiagA[r]; 2557 if (idx) idx[r] = offdiagIdx[r]; 2558 } 2559 } 2560 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2561 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2562 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2563 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2564 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2565 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2566 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2567 PetscFunctionReturn(0); 2568 } 2569 2570 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2571 { 2572 PetscErrorCode ierr; 2573 Mat *dummy; 2574 2575 PetscFunctionBegin; 2576 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2577 *newmat = *dummy; 2578 ierr = PetscFree(dummy);CHKERRQ(ierr); 2579 PetscFunctionReturn(0); 2580 } 2581 2582 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2583 { 2584 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2585 PetscErrorCode ierr; 2586 2587 PetscFunctionBegin; 2588 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2589 A->factorerrortype = a->A->factorerrortype; 2590 PetscFunctionReturn(0); 2591 } 2592 2593 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2594 { 2595 PetscErrorCode ierr; 2596 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2597 2598 PetscFunctionBegin; 2599 PetscCheckFalse(!x->assembled && !x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2600 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2601 if (x->assembled) { 2602 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2603 } else { 2604 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2605 } 2606 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2607 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2608 PetscFunctionReturn(0); 2609 } 2610 2611 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2612 { 2613 PetscFunctionBegin; 2614 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2615 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2616 PetscFunctionReturn(0); 2617 } 2618 2619 /*@ 2620 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2621 2622 Collective on Mat 2623 2624 Input Parameters: 2625 + A - the matrix 2626 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2627 2628 Level: advanced 2629 2630 @*/ 2631 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2632 { 2633 PetscErrorCode ierr; 2634 2635 PetscFunctionBegin; 2636 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2637 PetscFunctionReturn(0); 2638 } 2639 2640 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2641 { 2642 PetscErrorCode ierr; 2643 PetscBool sc = PETSC_FALSE,flg; 2644 2645 PetscFunctionBegin; 2646 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2647 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2648 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2649 if (flg) { 2650 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2651 } 2652 ierr = PetscOptionsTail();CHKERRQ(ierr); 2653 PetscFunctionReturn(0); 2654 } 2655 2656 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2657 { 2658 PetscErrorCode ierr; 2659 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2660 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2661 2662 PetscFunctionBegin; 2663 if (!Y->preallocated) { 2664 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2665 } else if (!aij->nz) { 2666 PetscInt nonew = aij->nonew; 2667 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2668 aij->nonew = nonew; 2669 } 2670 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2671 PetscFunctionReturn(0); 2672 } 2673 2674 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2675 { 2676 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2677 PetscErrorCode ierr; 2678 2679 PetscFunctionBegin; 2680 PetscCheckFalse(A->rmap->n != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2681 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2682 if (d) { 2683 PetscInt rstart; 2684 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2685 *d += rstart; 2686 2687 } 2688 PetscFunctionReturn(0); 2689 } 2690 2691 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2692 { 2693 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2694 PetscErrorCode ierr; 2695 2696 PetscFunctionBegin; 2697 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2698 PetscFunctionReturn(0); 2699 } 2700 2701 /* -------------------------------------------------------------------*/ 2702 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2703 MatGetRow_MPIAIJ, 2704 MatRestoreRow_MPIAIJ, 2705 MatMult_MPIAIJ, 2706 /* 4*/ MatMultAdd_MPIAIJ, 2707 MatMultTranspose_MPIAIJ, 2708 MatMultTransposeAdd_MPIAIJ, 2709 NULL, 2710 NULL, 2711 NULL, 2712 /*10*/ NULL, 2713 NULL, 2714 NULL, 2715 MatSOR_MPIAIJ, 2716 MatTranspose_MPIAIJ, 2717 /*15*/ MatGetInfo_MPIAIJ, 2718 MatEqual_MPIAIJ, 2719 MatGetDiagonal_MPIAIJ, 2720 MatDiagonalScale_MPIAIJ, 2721 MatNorm_MPIAIJ, 2722 /*20*/ MatAssemblyBegin_MPIAIJ, 2723 MatAssemblyEnd_MPIAIJ, 2724 MatSetOption_MPIAIJ, 2725 MatZeroEntries_MPIAIJ, 2726 /*24*/ MatZeroRows_MPIAIJ, 2727 NULL, 2728 NULL, 2729 NULL, 2730 NULL, 2731 /*29*/ MatSetUp_MPIAIJ, 2732 NULL, 2733 NULL, 2734 MatGetDiagonalBlock_MPIAIJ, 2735 NULL, 2736 /*34*/ MatDuplicate_MPIAIJ, 2737 NULL, 2738 NULL, 2739 NULL, 2740 NULL, 2741 /*39*/ MatAXPY_MPIAIJ, 2742 MatCreateSubMatrices_MPIAIJ, 2743 MatIncreaseOverlap_MPIAIJ, 2744 MatGetValues_MPIAIJ, 2745 MatCopy_MPIAIJ, 2746 /*44*/ MatGetRowMax_MPIAIJ, 2747 MatScale_MPIAIJ, 2748 MatShift_MPIAIJ, 2749 MatDiagonalSet_MPIAIJ, 2750 MatZeroRowsColumns_MPIAIJ, 2751 /*49*/ MatSetRandom_MPIAIJ, 2752 NULL, 2753 NULL, 2754 NULL, 2755 NULL, 2756 /*54*/ MatFDColoringCreate_MPIXAIJ, 2757 NULL, 2758 MatSetUnfactored_MPIAIJ, 2759 MatPermute_MPIAIJ, 2760 NULL, 2761 /*59*/ MatCreateSubMatrix_MPIAIJ, 2762 MatDestroy_MPIAIJ, 2763 MatView_MPIAIJ, 2764 NULL, 2765 NULL, 2766 /*64*/ NULL, 2767 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2768 NULL, 2769 NULL, 2770 NULL, 2771 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2772 MatGetRowMinAbs_MPIAIJ, 2773 NULL, 2774 NULL, 2775 NULL, 2776 NULL, 2777 /*75*/ MatFDColoringApply_AIJ, 2778 MatSetFromOptions_MPIAIJ, 2779 NULL, 2780 NULL, 2781 MatFindZeroDiagonals_MPIAIJ, 2782 /*80*/ NULL, 2783 NULL, 2784 NULL, 2785 /*83*/ MatLoad_MPIAIJ, 2786 MatIsSymmetric_MPIAIJ, 2787 NULL, 2788 NULL, 2789 NULL, 2790 NULL, 2791 /*89*/ NULL, 2792 NULL, 2793 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2794 NULL, 2795 NULL, 2796 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2797 NULL, 2798 NULL, 2799 NULL, 2800 MatBindToCPU_MPIAIJ, 2801 /*99*/ MatProductSetFromOptions_MPIAIJ, 2802 NULL, 2803 NULL, 2804 MatConjugate_MPIAIJ, 2805 NULL, 2806 /*104*/MatSetValuesRow_MPIAIJ, 2807 MatRealPart_MPIAIJ, 2808 MatImaginaryPart_MPIAIJ, 2809 NULL, 2810 NULL, 2811 /*109*/NULL, 2812 NULL, 2813 MatGetRowMin_MPIAIJ, 2814 NULL, 2815 MatMissingDiagonal_MPIAIJ, 2816 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2817 NULL, 2818 MatGetGhosts_MPIAIJ, 2819 NULL, 2820 NULL, 2821 /*119*/MatMultDiagonalBlock_MPIAIJ, 2822 NULL, 2823 NULL, 2824 NULL, 2825 MatGetMultiProcBlock_MPIAIJ, 2826 /*124*/MatFindNonzeroRows_MPIAIJ, 2827 MatGetColumnReductions_MPIAIJ, 2828 MatInvertBlockDiagonal_MPIAIJ, 2829 MatInvertVariableBlockDiagonal_MPIAIJ, 2830 MatCreateSubMatricesMPI_MPIAIJ, 2831 /*129*/NULL, 2832 NULL, 2833 NULL, 2834 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2835 NULL, 2836 /*134*/NULL, 2837 NULL, 2838 NULL, 2839 NULL, 2840 NULL, 2841 /*139*/MatSetBlockSizes_MPIAIJ, 2842 NULL, 2843 NULL, 2844 MatFDColoringSetUp_MPIXAIJ, 2845 MatFindOffBlockDiagonalEntries_MPIAIJ, 2846 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2847 /*145*/NULL, 2848 NULL, 2849 NULL 2850 }; 2851 2852 /* ----------------------------------------------------------------------------------------*/ 2853 2854 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2855 { 2856 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2857 PetscErrorCode ierr; 2858 2859 PetscFunctionBegin; 2860 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2861 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2862 PetscFunctionReturn(0); 2863 } 2864 2865 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2866 { 2867 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2868 PetscErrorCode ierr; 2869 2870 PetscFunctionBegin; 2871 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2872 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2873 PetscFunctionReturn(0); 2874 } 2875 2876 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2877 { 2878 Mat_MPIAIJ *b; 2879 PetscErrorCode ierr; 2880 PetscMPIInt size; 2881 2882 PetscFunctionBegin; 2883 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2884 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2885 b = (Mat_MPIAIJ*)B->data; 2886 2887 #if defined(PETSC_USE_CTABLE) 2888 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2889 #else 2890 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2891 #endif 2892 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2893 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2894 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2895 2896 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2897 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 2898 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2899 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2900 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2901 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2902 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2903 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2904 2905 if (!B->preallocated) { 2906 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2907 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2908 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2909 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2910 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2911 } 2912 2913 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2914 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2915 B->preallocated = PETSC_TRUE; 2916 B->was_assembled = PETSC_FALSE; 2917 B->assembled = PETSC_FALSE; 2918 PetscFunctionReturn(0); 2919 } 2920 2921 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2922 { 2923 Mat_MPIAIJ *b; 2924 PetscErrorCode ierr; 2925 2926 PetscFunctionBegin; 2927 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2928 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2929 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2930 b = (Mat_MPIAIJ*)B->data; 2931 2932 #if defined(PETSC_USE_CTABLE) 2933 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2934 #else 2935 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2936 #endif 2937 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2938 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2939 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2940 2941 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2942 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2943 B->preallocated = PETSC_TRUE; 2944 B->was_assembled = PETSC_FALSE; 2945 B->assembled = PETSC_FALSE; 2946 PetscFunctionReturn(0); 2947 } 2948 2949 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2950 { 2951 Mat mat; 2952 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2953 PetscErrorCode ierr; 2954 2955 PetscFunctionBegin; 2956 *newmat = NULL; 2957 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2958 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2959 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2960 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2961 a = (Mat_MPIAIJ*)mat->data; 2962 2963 mat->factortype = matin->factortype; 2964 mat->assembled = matin->assembled; 2965 mat->insertmode = NOT_SET_VALUES; 2966 mat->preallocated = matin->preallocated; 2967 2968 a->size = oldmat->size; 2969 a->rank = oldmat->rank; 2970 a->donotstash = oldmat->donotstash; 2971 a->roworiented = oldmat->roworiented; 2972 a->rowindices = NULL; 2973 a->rowvalues = NULL; 2974 a->getrowactive = PETSC_FALSE; 2975 2976 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2977 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2978 2979 if (oldmat->colmap) { 2980 #if defined(PETSC_USE_CTABLE) 2981 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2982 #else 2983 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2984 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2985 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2986 #endif 2987 } else a->colmap = NULL; 2988 if (oldmat->garray) { 2989 PetscInt len; 2990 len = oldmat->B->cmap->n; 2991 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2992 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2993 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2994 } else a->garray = NULL; 2995 2996 /* It may happen MatDuplicate is called with a non-assembled matrix 2997 In fact, MatDuplicate only requires the matrix to be preallocated 2998 This may happen inside a DMCreateMatrix_Shell */ 2999 if (oldmat->lvec) { 3000 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3001 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3002 } 3003 if (oldmat->Mvctx) { 3004 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3005 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3006 } 3007 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3008 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3009 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3010 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3011 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3012 *newmat = mat; 3013 PetscFunctionReturn(0); 3014 } 3015 3016 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3017 { 3018 PetscBool isbinary, ishdf5; 3019 PetscErrorCode ierr; 3020 3021 PetscFunctionBegin; 3022 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 3023 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 3024 /* force binary viewer to load .info file if it has not yet done so */ 3025 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3026 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3027 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 3028 if (isbinary) { 3029 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 3030 } else if (ishdf5) { 3031 #if defined(PETSC_HAVE_HDF5) 3032 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 3033 #else 3034 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3035 #endif 3036 } else { 3037 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3038 } 3039 PetscFunctionReturn(0); 3040 } 3041 3042 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3043 { 3044 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3045 PetscInt *rowidxs,*colidxs; 3046 PetscScalar *matvals; 3047 PetscErrorCode ierr; 3048 3049 PetscFunctionBegin; 3050 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3051 3052 /* read in matrix header */ 3053 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3054 PetscCheckFalse(header[0] != MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3055 M = header[1]; N = header[2]; nz = header[3]; 3056 PetscCheckFalse(M < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3057 PetscCheckFalse(N < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3058 PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3059 3060 /* set block sizes from the viewer's .info file */ 3061 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3062 /* set global sizes if not set already */ 3063 if (mat->rmap->N < 0) mat->rmap->N = M; 3064 if (mat->cmap->N < 0) mat->cmap->N = N; 3065 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3066 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3067 3068 /* check if the matrix sizes are correct */ 3069 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3070 PetscCheckFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3071 3072 /* read in row lengths and build row indices */ 3073 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3074 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3075 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3076 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3077 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr); 3078 PetscCheckFalse(sum != nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3079 /* read in column indices and matrix values */ 3080 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3081 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3082 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3083 /* store matrix indices and values */ 3084 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3085 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3086 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3087 PetscFunctionReturn(0); 3088 } 3089 3090 /* Not scalable because of ISAllGather() unless getting all columns. */ 3091 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3092 { 3093 PetscErrorCode ierr; 3094 IS iscol_local; 3095 PetscBool isstride; 3096 PetscMPIInt lisstride=0,gisstride; 3097 3098 PetscFunctionBegin; 3099 /* check if we are grabbing all columns*/ 3100 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3101 3102 if (isstride) { 3103 PetscInt start,len,mstart,mlen; 3104 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3105 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3106 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3107 if (mstart == start && mlen-mstart == len) lisstride = 1; 3108 } 3109 3110 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3111 if (gisstride) { 3112 PetscInt N; 3113 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3114 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3115 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3116 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3117 } else { 3118 PetscInt cbs; 3119 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3120 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3121 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3122 } 3123 3124 *isseq = iscol_local; 3125 PetscFunctionReturn(0); 3126 } 3127 3128 /* 3129 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3130 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3131 3132 Input Parameters: 3133 mat - matrix 3134 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3135 i.e., mat->rstart <= isrow[i] < mat->rend 3136 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3137 i.e., mat->cstart <= iscol[i] < mat->cend 3138 Output Parameter: 3139 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3140 iscol_o - sequential column index set for retrieving mat->B 3141 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3142 */ 3143 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3144 { 3145 PetscErrorCode ierr; 3146 Vec x,cmap; 3147 const PetscInt *is_idx; 3148 PetscScalar *xarray,*cmaparray; 3149 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3150 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3151 Mat B=a->B; 3152 Vec lvec=a->lvec,lcmap; 3153 PetscInt i,cstart,cend,Bn=B->cmap->N; 3154 MPI_Comm comm; 3155 VecScatter Mvctx=a->Mvctx; 3156 3157 PetscFunctionBegin; 3158 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3159 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3160 3161 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3162 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3163 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3164 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3165 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3166 3167 /* Get start indices */ 3168 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3169 isstart -= ncols; 3170 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3171 3172 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3173 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3174 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3175 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3176 for (i=0; i<ncols; i++) { 3177 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3178 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3179 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3180 } 3181 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3182 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3183 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3184 3185 /* Get iscol_d */ 3186 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3187 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3188 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3189 3190 /* Get isrow_d */ 3191 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3192 rstart = mat->rmap->rstart; 3193 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3194 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3195 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3196 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3197 3198 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3199 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3200 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3201 3202 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3203 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3204 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3205 3206 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3207 3208 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3209 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3210 3211 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3212 /* off-process column indices */ 3213 count = 0; 3214 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3215 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3216 3217 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3218 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3219 for (i=0; i<Bn; i++) { 3220 if (PetscRealPart(xarray[i]) > -1.0) { 3221 idx[count] = i; /* local column index in off-diagonal part B */ 3222 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3223 count++; 3224 } 3225 } 3226 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3227 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3228 3229 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3230 /* cannot ensure iscol_o has same blocksize as iscol! */ 3231 3232 ierr = PetscFree(idx);CHKERRQ(ierr); 3233 *garray = cmap1; 3234 3235 ierr = VecDestroy(&x);CHKERRQ(ierr); 3236 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3237 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3238 PetscFunctionReturn(0); 3239 } 3240 3241 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3242 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3243 { 3244 PetscErrorCode ierr; 3245 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3246 Mat M = NULL; 3247 MPI_Comm comm; 3248 IS iscol_d,isrow_d,iscol_o; 3249 Mat Asub = NULL,Bsub = NULL; 3250 PetscInt n; 3251 3252 PetscFunctionBegin; 3253 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3254 3255 if (call == MAT_REUSE_MATRIX) { 3256 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3257 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3258 PetscCheckFalse(!isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3259 3260 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3261 PetscCheckFalse(!iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3262 3263 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3264 PetscCheckFalse(!iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3265 3266 /* Update diagonal and off-diagonal portions of submat */ 3267 asub = (Mat_MPIAIJ*)(*submat)->data; 3268 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3269 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3270 if (n) { 3271 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3272 } 3273 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3274 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3275 3276 } else { /* call == MAT_INITIAL_MATRIX) */ 3277 const PetscInt *garray; 3278 PetscInt BsubN; 3279 3280 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3281 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3282 3283 /* Create local submatrices Asub and Bsub */ 3284 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3285 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3286 3287 /* Create submatrix M */ 3288 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3289 3290 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3291 asub = (Mat_MPIAIJ*)M->data; 3292 3293 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3294 n = asub->B->cmap->N; 3295 if (BsubN > n) { 3296 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3297 const PetscInt *idx; 3298 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3299 ierr = PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3300 3301 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3302 j = 0; 3303 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3304 for (i=0; i<n; i++) { 3305 if (j >= BsubN) break; 3306 while (subgarray[i] > garray[j]) j++; 3307 3308 if (subgarray[i] == garray[j]) { 3309 idx_new[i] = idx[j++]; 3310 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]); 3311 } 3312 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3313 3314 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3315 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3316 3317 } else if (BsubN < n) { 3318 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N); 3319 } 3320 3321 ierr = PetscFree(garray);CHKERRQ(ierr); 3322 *submat = M; 3323 3324 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3325 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3326 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3327 3328 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3329 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3330 3331 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3332 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3333 } 3334 PetscFunctionReturn(0); 3335 } 3336 3337 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3338 { 3339 PetscErrorCode ierr; 3340 IS iscol_local=NULL,isrow_d; 3341 PetscInt csize; 3342 PetscInt n,i,j,start,end; 3343 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3344 MPI_Comm comm; 3345 3346 PetscFunctionBegin; 3347 /* If isrow has same processor distribution as mat, 3348 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3349 if (call == MAT_REUSE_MATRIX) { 3350 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3351 if (isrow_d) { 3352 sameRowDist = PETSC_TRUE; 3353 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3354 } else { 3355 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3356 if (iscol_local) { 3357 sameRowDist = PETSC_TRUE; 3358 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3359 } 3360 } 3361 } else { 3362 /* Check if isrow has same processor distribution as mat */ 3363 sameDist[0] = PETSC_FALSE; 3364 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3365 if (!n) { 3366 sameDist[0] = PETSC_TRUE; 3367 } else { 3368 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3369 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3370 if (i >= start && j < end) { 3371 sameDist[0] = PETSC_TRUE; 3372 } 3373 } 3374 3375 /* Check if iscol has same processor distribution as mat */ 3376 sameDist[1] = PETSC_FALSE; 3377 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3378 if (!n) { 3379 sameDist[1] = PETSC_TRUE; 3380 } else { 3381 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3382 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3383 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3384 } 3385 3386 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3387 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr); 3388 sameRowDist = tsameDist[0]; 3389 } 3390 3391 if (sameRowDist) { 3392 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3393 /* isrow and iscol have same processor distribution as mat */ 3394 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3395 PetscFunctionReturn(0); 3396 } else { /* sameRowDist */ 3397 /* isrow has same processor distribution as mat */ 3398 if (call == MAT_INITIAL_MATRIX) { 3399 PetscBool sorted; 3400 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3401 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3402 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3403 PetscCheckFalse(n != i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i); 3404 3405 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3406 if (sorted) { 3407 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3408 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3409 PetscFunctionReturn(0); 3410 } 3411 } else { /* call == MAT_REUSE_MATRIX */ 3412 IS iscol_sub; 3413 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3414 if (iscol_sub) { 3415 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3416 PetscFunctionReturn(0); 3417 } 3418 } 3419 } 3420 } 3421 3422 /* General case: iscol -> iscol_local which has global size of iscol */ 3423 if (call == MAT_REUSE_MATRIX) { 3424 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3425 PetscCheckFalse(!iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3426 } else { 3427 if (!iscol_local) { 3428 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3429 } 3430 } 3431 3432 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3433 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3434 3435 if (call == MAT_INITIAL_MATRIX) { 3436 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3437 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3438 } 3439 PetscFunctionReturn(0); 3440 } 3441 3442 /*@C 3443 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3444 and "off-diagonal" part of the matrix in CSR format. 3445 3446 Collective 3447 3448 Input Parameters: 3449 + comm - MPI communicator 3450 . A - "diagonal" portion of matrix 3451 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3452 - garray - global index of B columns 3453 3454 Output Parameter: 3455 . mat - the matrix, with input A as its local diagonal matrix 3456 Level: advanced 3457 3458 Notes: 3459 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3460 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3461 3462 .seealso: MatCreateMPIAIJWithSplitArrays() 3463 @*/ 3464 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3465 { 3466 PetscErrorCode ierr; 3467 Mat_MPIAIJ *maij; 3468 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3469 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3470 const PetscScalar *oa; 3471 Mat Bnew; 3472 PetscInt m,n,N; 3473 3474 PetscFunctionBegin; 3475 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3476 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3477 PetscCheckFalse(m != B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N); 3478 PetscCheckFalse(A->rmap->bs != B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs); 3479 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3480 /* PetscCheckFalse(A->cmap->bs != B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3481 3482 /* Get global columns of mat */ 3483 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3484 3485 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3486 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3487 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3488 maij = (Mat_MPIAIJ*)(*mat)->data; 3489 3490 (*mat)->preallocated = PETSC_TRUE; 3491 3492 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3493 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3494 3495 /* Set A as diagonal portion of *mat */ 3496 maij->A = A; 3497 3498 nz = oi[m]; 3499 for (i=0; i<nz; i++) { 3500 col = oj[i]; 3501 oj[i] = garray[col]; 3502 } 3503 3504 /* Set Bnew as off-diagonal portion of *mat */ 3505 ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr); 3506 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr); 3507 ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr); 3508 bnew = (Mat_SeqAIJ*)Bnew->data; 3509 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3510 maij->B = Bnew; 3511 3512 PetscCheckFalse(B->rmap->N != Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N); 3513 3514 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3515 b->free_a = PETSC_FALSE; 3516 b->free_ij = PETSC_FALSE; 3517 ierr = MatDestroy(&B);CHKERRQ(ierr); 3518 3519 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3520 bnew->free_a = PETSC_TRUE; 3521 bnew->free_ij = PETSC_TRUE; 3522 3523 /* condense columns of maij->B */ 3524 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3525 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3526 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3527 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3528 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3529 PetscFunctionReturn(0); 3530 } 3531 3532 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3533 3534 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3535 { 3536 PetscErrorCode ierr; 3537 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3538 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3539 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3540 Mat M,Msub,B=a->B; 3541 MatScalar *aa; 3542 Mat_SeqAIJ *aij; 3543 PetscInt *garray = a->garray,*colsub,Ncols; 3544 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3545 IS iscol_sub,iscmap; 3546 const PetscInt *is_idx,*cmap; 3547 PetscBool allcolumns=PETSC_FALSE; 3548 MPI_Comm comm; 3549 3550 PetscFunctionBegin; 3551 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3552 if (call == MAT_REUSE_MATRIX) { 3553 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3554 PetscCheckFalse(!iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3555 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3556 3557 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3558 PetscCheckFalse(!iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3559 3560 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3561 PetscCheckFalse(!Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3562 3563 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3564 3565 } else { /* call == MAT_INITIAL_MATRIX) */ 3566 PetscBool flg; 3567 3568 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3569 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3570 3571 /* (1) iscol -> nonscalable iscol_local */ 3572 /* Check for special case: each processor gets entire matrix columns */ 3573 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3574 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3575 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3576 if (allcolumns) { 3577 iscol_sub = iscol_local; 3578 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3579 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3580 3581 } else { 3582 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3583 PetscInt *idx,*cmap1,k; 3584 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3585 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3586 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3587 count = 0; 3588 k = 0; 3589 for (i=0; i<Ncols; i++) { 3590 j = is_idx[i]; 3591 if (j >= cstart && j < cend) { 3592 /* diagonal part of mat */ 3593 idx[count] = j; 3594 cmap1[count++] = i; /* column index in submat */ 3595 } else if (Bn) { 3596 /* off-diagonal part of mat */ 3597 if (j == garray[k]) { 3598 idx[count] = j; 3599 cmap1[count++] = i; /* column index in submat */ 3600 } else if (j > garray[k]) { 3601 while (j > garray[k] && k < Bn-1) k++; 3602 if (j == garray[k]) { 3603 idx[count] = j; 3604 cmap1[count++] = i; /* column index in submat */ 3605 } 3606 } 3607 } 3608 } 3609 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3610 3611 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3612 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3613 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3614 3615 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3616 } 3617 3618 /* (3) Create sequential Msub */ 3619 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3620 } 3621 3622 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3623 aij = (Mat_SeqAIJ*)(Msub)->data; 3624 ii = aij->i; 3625 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3626 3627 /* 3628 m - number of local rows 3629 Ncols - number of columns (same on all processors) 3630 rstart - first row in new global matrix generated 3631 */ 3632 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3633 3634 if (call == MAT_INITIAL_MATRIX) { 3635 /* (4) Create parallel newmat */ 3636 PetscMPIInt rank,size; 3637 PetscInt csize; 3638 3639 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3640 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3641 3642 /* 3643 Determine the number of non-zeros in the diagonal and off-diagonal 3644 portions of the matrix in order to do correct preallocation 3645 */ 3646 3647 /* first get start and end of "diagonal" columns */ 3648 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3649 if (csize == PETSC_DECIDE) { 3650 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3651 if (mglobal == Ncols) { /* square matrix */ 3652 nlocal = m; 3653 } else { 3654 nlocal = Ncols/size + ((Ncols % size) > rank); 3655 } 3656 } else { 3657 nlocal = csize; 3658 } 3659 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3660 rstart = rend - nlocal; 3661 PetscCheckFalse(rank == size - 1 && rend != Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols); 3662 3663 /* next, compute all the lengths */ 3664 jj = aij->j; 3665 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3666 olens = dlens + m; 3667 for (i=0; i<m; i++) { 3668 jend = ii[i+1] - ii[i]; 3669 olen = 0; 3670 dlen = 0; 3671 for (j=0; j<jend; j++) { 3672 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3673 else dlen++; 3674 jj++; 3675 } 3676 olens[i] = olen; 3677 dlens[i] = dlen; 3678 } 3679 3680 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3681 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3682 3683 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3684 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3685 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3686 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3687 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3688 ierr = PetscFree(dlens);CHKERRQ(ierr); 3689 3690 } else { /* call == MAT_REUSE_MATRIX */ 3691 M = *newmat; 3692 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3693 PetscCheckFalse(i != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3694 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3695 /* 3696 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3697 rather than the slower MatSetValues(). 3698 */ 3699 M->was_assembled = PETSC_TRUE; 3700 M->assembled = PETSC_FALSE; 3701 } 3702 3703 /* (5) Set values of Msub to *newmat */ 3704 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3705 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3706 3707 jj = aij->j; 3708 ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3709 for (i=0; i<m; i++) { 3710 row = rstart + i; 3711 nz = ii[i+1] - ii[i]; 3712 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3713 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3714 jj += nz; aa += nz; 3715 } 3716 ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3717 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3718 3719 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3720 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3721 3722 ierr = PetscFree(colsub);CHKERRQ(ierr); 3723 3724 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3725 if (call == MAT_INITIAL_MATRIX) { 3726 *newmat = M; 3727 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3728 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3729 3730 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3731 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3732 3733 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3734 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3735 3736 if (iscol_local) { 3737 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3738 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3739 } 3740 } 3741 PetscFunctionReturn(0); 3742 } 3743 3744 /* 3745 Not great since it makes two copies of the submatrix, first an SeqAIJ 3746 in local and then by concatenating the local matrices the end result. 3747 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3748 3749 Note: This requires a sequential iscol with all indices. 3750 */ 3751 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3752 { 3753 PetscErrorCode ierr; 3754 PetscMPIInt rank,size; 3755 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3756 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3757 Mat M,Mreuse; 3758 MatScalar *aa,*vwork; 3759 MPI_Comm comm; 3760 Mat_SeqAIJ *aij; 3761 PetscBool colflag,allcolumns=PETSC_FALSE; 3762 3763 PetscFunctionBegin; 3764 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3765 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3766 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3767 3768 /* Check for special case: each processor gets entire matrix columns */ 3769 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3770 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3771 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3772 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3773 3774 if (call == MAT_REUSE_MATRIX) { 3775 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3776 PetscCheckFalse(!Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3777 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3778 } else { 3779 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3780 } 3781 3782 /* 3783 m - number of local rows 3784 n - number of columns (same on all processors) 3785 rstart - first row in new global matrix generated 3786 */ 3787 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3788 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3789 if (call == MAT_INITIAL_MATRIX) { 3790 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3791 ii = aij->i; 3792 jj = aij->j; 3793 3794 /* 3795 Determine the number of non-zeros in the diagonal and off-diagonal 3796 portions of the matrix in order to do correct preallocation 3797 */ 3798 3799 /* first get start and end of "diagonal" columns */ 3800 if (csize == PETSC_DECIDE) { 3801 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3802 if (mglobal == n) { /* square matrix */ 3803 nlocal = m; 3804 } else { 3805 nlocal = n/size + ((n % size) > rank); 3806 } 3807 } else { 3808 nlocal = csize; 3809 } 3810 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3811 rstart = rend - nlocal; 3812 PetscCheckFalse(rank == size - 1 && rend != n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 3813 3814 /* next, compute all the lengths */ 3815 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3816 olens = dlens + m; 3817 for (i=0; i<m; i++) { 3818 jend = ii[i+1] - ii[i]; 3819 olen = 0; 3820 dlen = 0; 3821 for (j=0; j<jend; j++) { 3822 if (*jj < rstart || *jj >= rend) olen++; 3823 else dlen++; 3824 jj++; 3825 } 3826 olens[i] = olen; 3827 dlens[i] = dlen; 3828 } 3829 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3830 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3831 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3832 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3833 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3834 ierr = PetscFree(dlens);CHKERRQ(ierr); 3835 } else { 3836 PetscInt ml,nl; 3837 3838 M = *newmat; 3839 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3840 PetscCheckFalse(ml != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3841 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3842 /* 3843 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3844 rather than the slower MatSetValues(). 3845 */ 3846 M->was_assembled = PETSC_TRUE; 3847 M->assembled = PETSC_FALSE; 3848 } 3849 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3850 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3851 ii = aij->i; 3852 jj = aij->j; 3853 3854 /* trigger copy to CPU if needed */ 3855 ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3856 for (i=0; i<m; i++) { 3857 row = rstart + i; 3858 nz = ii[i+1] - ii[i]; 3859 cwork = jj; jj += nz; 3860 vwork = aa; aa += nz; 3861 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3862 } 3863 ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3864 3865 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3866 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3867 *newmat = M; 3868 3869 /* save submatrix used in processor for next request */ 3870 if (call == MAT_INITIAL_MATRIX) { 3871 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3872 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3873 } 3874 PetscFunctionReturn(0); 3875 } 3876 3877 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3878 { 3879 PetscInt m,cstart, cend,j,nnz,i,d; 3880 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3881 const PetscInt *JJ; 3882 PetscErrorCode ierr; 3883 PetscBool nooffprocentries; 3884 3885 PetscFunctionBegin; 3886 PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]); 3887 3888 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3889 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3890 m = B->rmap->n; 3891 cstart = B->cmap->rstart; 3892 cend = B->cmap->rend; 3893 rstart = B->rmap->rstart; 3894 3895 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3896 3897 if (PetscDefined(USE_DEBUG)) { 3898 for (i=0; i<m; i++) { 3899 nnz = Ii[i+1]- Ii[i]; 3900 JJ = J + Ii[i]; 3901 PetscCheckFalse(nnz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz); 3902 PetscCheckFalse(nnz && (JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]); 3903 PetscCheckFalse(nnz && (JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N); 3904 } 3905 } 3906 3907 for (i=0; i<m; i++) { 3908 nnz = Ii[i+1]- Ii[i]; 3909 JJ = J + Ii[i]; 3910 nnz_max = PetscMax(nnz_max,nnz); 3911 d = 0; 3912 for (j=0; j<nnz; j++) { 3913 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3914 } 3915 d_nnz[i] = d; 3916 o_nnz[i] = nnz - d; 3917 } 3918 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3919 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3920 3921 for (i=0; i<m; i++) { 3922 ii = i + rstart; 3923 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3924 } 3925 nooffprocentries = B->nooffprocentries; 3926 B->nooffprocentries = PETSC_TRUE; 3927 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3928 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3929 B->nooffprocentries = nooffprocentries; 3930 3931 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3932 PetscFunctionReturn(0); 3933 } 3934 3935 /*@ 3936 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3937 (the default parallel PETSc format). 3938 3939 Collective 3940 3941 Input Parameters: 3942 + B - the matrix 3943 . i - the indices into j for the start of each local row (starts with zero) 3944 . j - the column indices for each local row (starts with zero) 3945 - v - optional values in the matrix 3946 3947 Level: developer 3948 3949 Notes: 3950 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3951 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3952 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3953 3954 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3955 3956 The format which is used for the sparse matrix input, is equivalent to a 3957 row-major ordering.. i.e for the following matrix, the input data expected is 3958 as shown 3959 3960 $ 1 0 0 3961 $ 2 0 3 P0 3962 $ ------- 3963 $ 4 5 6 P1 3964 $ 3965 $ Process0 [P0]: rows_owned=[0,1] 3966 $ i = {0,1,3} [size = nrow+1 = 2+1] 3967 $ j = {0,0,2} [size = 3] 3968 $ v = {1,2,3} [size = 3] 3969 $ 3970 $ Process1 [P1]: rows_owned=[2] 3971 $ i = {0,3} [size = nrow+1 = 1+1] 3972 $ j = {0,1,2} [size = 3] 3973 $ v = {4,5,6} [size = 3] 3974 3975 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3976 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3977 @*/ 3978 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3979 { 3980 PetscErrorCode ierr; 3981 3982 PetscFunctionBegin; 3983 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3984 PetscFunctionReturn(0); 3985 } 3986 3987 /*@C 3988 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3989 (the default parallel PETSc format). For good matrix assembly performance 3990 the user should preallocate the matrix storage by setting the parameters 3991 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3992 performance can be increased by more than a factor of 50. 3993 3994 Collective 3995 3996 Input Parameters: 3997 + B - the matrix 3998 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3999 (same value is used for all local rows) 4000 . d_nnz - array containing the number of nonzeros in the various rows of the 4001 DIAGONAL portion of the local submatrix (possibly different for each row) 4002 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4003 The size of this array is equal to the number of local rows, i.e 'm'. 4004 For matrices that will be factored, you must leave room for (and set) 4005 the diagonal entry even if it is zero. 4006 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4007 submatrix (same value is used for all local rows). 4008 - o_nnz - array containing the number of nonzeros in the various rows of the 4009 OFF-DIAGONAL portion of the local submatrix (possibly different for 4010 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4011 structure. The size of this array is equal to the number 4012 of local rows, i.e 'm'. 4013 4014 If the *_nnz parameter is given then the *_nz parameter is ignored 4015 4016 The AIJ format (also called the Yale sparse matrix format or 4017 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4018 storage. The stored row and column indices begin with zero. 4019 See Users-Manual: ch_mat for details. 4020 4021 The parallel matrix is partitioned such that the first m0 rows belong to 4022 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4023 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4024 4025 The DIAGONAL portion of the local submatrix of a processor can be defined 4026 as the submatrix which is obtained by extraction the part corresponding to 4027 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4028 first row that belongs to the processor, r2 is the last row belonging to 4029 the this processor, and c1-c2 is range of indices of the local part of a 4030 vector suitable for applying the matrix to. This is an mxn matrix. In the 4031 common case of a square matrix, the row and column ranges are the same and 4032 the DIAGONAL part is also square. The remaining portion of the local 4033 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4034 4035 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4036 4037 You can call MatGetInfo() to get information on how effective the preallocation was; 4038 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4039 You can also run with the option -info and look for messages with the string 4040 malloc in them to see if additional memory allocation was needed. 4041 4042 Example usage: 4043 4044 Consider the following 8x8 matrix with 34 non-zero values, that is 4045 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4046 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4047 as follows: 4048 4049 .vb 4050 1 2 0 | 0 3 0 | 0 4 4051 Proc0 0 5 6 | 7 0 0 | 8 0 4052 9 0 10 | 11 0 0 | 12 0 4053 ------------------------------------- 4054 13 0 14 | 15 16 17 | 0 0 4055 Proc1 0 18 0 | 19 20 21 | 0 0 4056 0 0 0 | 22 23 0 | 24 0 4057 ------------------------------------- 4058 Proc2 25 26 27 | 0 0 28 | 29 0 4059 30 0 0 | 31 32 33 | 0 34 4060 .ve 4061 4062 This can be represented as a collection of submatrices as: 4063 4064 .vb 4065 A B C 4066 D E F 4067 G H I 4068 .ve 4069 4070 Where the submatrices A,B,C are owned by proc0, D,E,F are 4071 owned by proc1, G,H,I are owned by proc2. 4072 4073 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4074 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4075 The 'M','N' parameters are 8,8, and have the same values on all procs. 4076 4077 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4078 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4079 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4080 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4081 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4082 matrix, ans [DF] as another SeqAIJ matrix. 4083 4084 When d_nz, o_nz parameters are specified, d_nz storage elements are 4085 allocated for every row of the local diagonal submatrix, and o_nz 4086 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4087 One way to choose d_nz and o_nz is to use the max nonzerors per local 4088 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4089 In this case, the values of d_nz,o_nz are: 4090 .vb 4091 proc0 : dnz = 2, o_nz = 2 4092 proc1 : dnz = 3, o_nz = 2 4093 proc2 : dnz = 1, o_nz = 4 4094 .ve 4095 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4096 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4097 for proc3. i.e we are using 12+15+10=37 storage locations to store 4098 34 values. 4099 4100 When d_nnz, o_nnz parameters are specified, the storage is specified 4101 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4102 In the above case the values for d_nnz,o_nnz are: 4103 .vb 4104 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4105 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4106 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4107 .ve 4108 Here the space allocated is sum of all the above values i.e 34, and 4109 hence pre-allocation is perfect. 4110 4111 Level: intermediate 4112 4113 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4114 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4115 @*/ 4116 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4117 { 4118 PetscErrorCode ierr; 4119 4120 PetscFunctionBegin; 4121 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4122 PetscValidType(B,1); 4123 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4124 PetscFunctionReturn(0); 4125 } 4126 4127 /*@ 4128 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4129 CSR format for the local rows. 4130 4131 Collective 4132 4133 Input Parameters: 4134 + comm - MPI communicator 4135 . m - number of local rows (Cannot be PETSC_DECIDE) 4136 . n - This value should be the same as the local size used in creating the 4137 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4138 calculated if N is given) For square matrices n is almost always m. 4139 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4140 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4141 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4142 . j - column indices 4143 - a - matrix values 4144 4145 Output Parameter: 4146 . mat - the matrix 4147 4148 Level: intermediate 4149 4150 Notes: 4151 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4152 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4153 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4154 4155 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4156 4157 The format which is used for the sparse matrix input, is equivalent to a 4158 row-major ordering.. i.e for the following matrix, the input data expected is 4159 as shown 4160 4161 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4162 4163 $ 1 0 0 4164 $ 2 0 3 P0 4165 $ ------- 4166 $ 4 5 6 P1 4167 $ 4168 $ Process0 [P0]: rows_owned=[0,1] 4169 $ i = {0,1,3} [size = nrow+1 = 2+1] 4170 $ j = {0,0,2} [size = 3] 4171 $ v = {1,2,3} [size = 3] 4172 $ 4173 $ Process1 [P1]: rows_owned=[2] 4174 $ i = {0,3} [size = nrow+1 = 1+1] 4175 $ j = {0,1,2} [size = 3] 4176 $ v = {4,5,6} [size = 3] 4177 4178 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4179 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4180 @*/ 4181 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4182 { 4183 PetscErrorCode ierr; 4184 4185 PetscFunctionBegin; 4186 PetscCheckFalse(i && i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4187 PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4188 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4189 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4190 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4191 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4192 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4193 PetscFunctionReturn(0); 4194 } 4195 4196 /*@ 4197 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4198 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4199 4200 Collective 4201 4202 Input Parameters: 4203 + mat - the matrix 4204 . m - number of local rows (Cannot be PETSC_DECIDE) 4205 . n - This value should be the same as the local size used in creating the 4206 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4207 calculated if N is given) For square matrices n is almost always m. 4208 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4209 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4210 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4211 . J - column indices 4212 - v - matrix values 4213 4214 Level: intermediate 4215 4216 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4217 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4218 @*/ 4219 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4220 { 4221 PetscErrorCode ierr; 4222 PetscInt cstart,nnz,i,j; 4223 PetscInt *ld; 4224 PetscBool nooffprocentries; 4225 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4226 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4227 PetscScalar *ad,*ao; 4228 const PetscInt *Adi = Ad->i; 4229 PetscInt ldi,Iii,md; 4230 4231 PetscFunctionBegin; 4232 PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4233 PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4234 PetscCheckFalse(m != mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4235 PetscCheckFalse(n != mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4236 4237 ierr = MatSeqAIJGetArrayWrite(Aij->A,&ad);CHKERRQ(ierr); 4238 ierr = MatSeqAIJGetArrayWrite(Aij->B,&ao);CHKERRQ(ierr); 4239 cstart = mat->cmap->rstart; 4240 if (!Aij->ld) { 4241 /* count number of entries below block diagonal */ 4242 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4243 Aij->ld = ld; 4244 for (i=0; i<m; i++) { 4245 nnz = Ii[i+1]- Ii[i]; 4246 j = 0; 4247 while (J[j] < cstart && j < nnz) {j++;} 4248 J += nnz; 4249 ld[i] = j; 4250 } 4251 } else { 4252 ld = Aij->ld; 4253 } 4254 4255 for (i=0; i<m; i++) { 4256 nnz = Ii[i+1]- Ii[i]; 4257 Iii = Ii[i]; 4258 ldi = ld[i]; 4259 md = Adi[i+1]-Adi[i]; 4260 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4261 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4262 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4263 ad += md; 4264 ao += nnz - md; 4265 } 4266 nooffprocentries = mat->nooffprocentries; 4267 mat->nooffprocentries = PETSC_TRUE; 4268 ierr = MatSeqAIJRestoreArrayWrite(Aij->A,&ad);CHKERRQ(ierr); 4269 ierr = MatSeqAIJRestoreArrayWrite(Aij->B,&ao);CHKERRQ(ierr); 4270 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4271 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4272 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4273 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4274 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4275 mat->nooffprocentries = nooffprocentries; 4276 PetscFunctionReturn(0); 4277 } 4278 4279 /*@C 4280 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4281 (the default parallel PETSc format). For good matrix assembly performance 4282 the user should preallocate the matrix storage by setting the parameters 4283 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4284 performance can be increased by more than a factor of 50. 4285 4286 Collective 4287 4288 Input Parameters: 4289 + comm - MPI communicator 4290 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4291 This value should be the same as the local size used in creating the 4292 y vector for the matrix-vector product y = Ax. 4293 . n - This value should be the same as the local size used in creating the 4294 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4295 calculated if N is given) For square matrices n is almost always m. 4296 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4297 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4298 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4299 (same value is used for all local rows) 4300 . d_nnz - array containing the number of nonzeros in the various rows of the 4301 DIAGONAL portion of the local submatrix (possibly different for each row) 4302 or NULL, if d_nz is used to specify the nonzero structure. 4303 The size of this array is equal to the number of local rows, i.e 'm'. 4304 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4305 submatrix (same value is used for all local rows). 4306 - o_nnz - array containing the number of nonzeros in the various rows of the 4307 OFF-DIAGONAL portion of the local submatrix (possibly different for 4308 each row) or NULL, if o_nz is used to specify the nonzero 4309 structure. The size of this array is equal to the number 4310 of local rows, i.e 'm'. 4311 4312 Output Parameter: 4313 . A - the matrix 4314 4315 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4316 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4317 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4318 4319 Notes: 4320 If the *_nnz parameter is given then the *_nz parameter is ignored 4321 4322 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4323 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4324 storage requirements for this matrix. 4325 4326 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4327 processor than it must be used on all processors that share the object for 4328 that argument. 4329 4330 The user MUST specify either the local or global matrix dimensions 4331 (possibly both). 4332 4333 The parallel matrix is partitioned across processors such that the 4334 first m0 rows belong to process 0, the next m1 rows belong to 4335 process 1, the next m2 rows belong to process 2 etc.. where 4336 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4337 values corresponding to [m x N] submatrix. 4338 4339 The columns are logically partitioned with the n0 columns belonging 4340 to 0th partition, the next n1 columns belonging to the next 4341 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4342 4343 The DIAGONAL portion of the local submatrix on any given processor 4344 is the submatrix corresponding to the rows and columns m,n 4345 corresponding to the given processor. i.e diagonal matrix on 4346 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4347 etc. The remaining portion of the local submatrix [m x (N-n)] 4348 constitute the OFF-DIAGONAL portion. The example below better 4349 illustrates this concept. 4350 4351 For a square global matrix we define each processor's diagonal portion 4352 to be its local rows and the corresponding columns (a square submatrix); 4353 each processor's off-diagonal portion encompasses the remainder of the 4354 local matrix (a rectangular submatrix). 4355 4356 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4357 4358 When calling this routine with a single process communicator, a matrix of 4359 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4360 type of communicator, use the construction mechanism 4361 .vb 4362 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4363 .ve 4364 4365 $ MatCreate(...,&A); 4366 $ MatSetType(A,MATMPIAIJ); 4367 $ MatSetSizes(A, m,n,M,N); 4368 $ MatMPIAIJSetPreallocation(A,...); 4369 4370 By default, this format uses inodes (identical nodes) when possible. 4371 We search for consecutive rows with the same nonzero structure, thereby 4372 reusing matrix information to achieve increased efficiency. 4373 4374 Options Database Keys: 4375 + -mat_no_inode - Do not use inodes 4376 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4377 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4378 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4379 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4380 4381 Example usage: 4382 4383 Consider the following 8x8 matrix with 34 non-zero values, that is 4384 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4385 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4386 as follows 4387 4388 .vb 4389 1 2 0 | 0 3 0 | 0 4 4390 Proc0 0 5 6 | 7 0 0 | 8 0 4391 9 0 10 | 11 0 0 | 12 0 4392 ------------------------------------- 4393 13 0 14 | 15 16 17 | 0 0 4394 Proc1 0 18 0 | 19 20 21 | 0 0 4395 0 0 0 | 22 23 0 | 24 0 4396 ------------------------------------- 4397 Proc2 25 26 27 | 0 0 28 | 29 0 4398 30 0 0 | 31 32 33 | 0 34 4399 .ve 4400 4401 This can be represented as a collection of submatrices as 4402 4403 .vb 4404 A B C 4405 D E F 4406 G H I 4407 .ve 4408 4409 Where the submatrices A,B,C are owned by proc0, D,E,F are 4410 owned by proc1, G,H,I are owned by proc2. 4411 4412 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4413 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4414 The 'M','N' parameters are 8,8, and have the same values on all procs. 4415 4416 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4417 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4418 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4419 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4420 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4421 matrix, ans [DF] as another SeqAIJ matrix. 4422 4423 When d_nz, o_nz parameters are specified, d_nz storage elements are 4424 allocated for every row of the local diagonal submatrix, and o_nz 4425 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4426 One way to choose d_nz and o_nz is to use the max nonzerors per local 4427 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4428 In this case, the values of d_nz,o_nz are 4429 .vb 4430 proc0 : dnz = 2, o_nz = 2 4431 proc1 : dnz = 3, o_nz = 2 4432 proc2 : dnz = 1, o_nz = 4 4433 .ve 4434 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4435 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4436 for proc3. i.e we are using 12+15+10=37 storage locations to store 4437 34 values. 4438 4439 When d_nnz, o_nnz parameters are specified, the storage is specified 4440 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4441 In the above case the values for d_nnz,o_nnz are 4442 .vb 4443 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4444 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4445 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4446 .ve 4447 Here the space allocated is sum of all the above values i.e 34, and 4448 hence pre-allocation is perfect. 4449 4450 Level: intermediate 4451 4452 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4453 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4454 @*/ 4455 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4456 { 4457 PetscErrorCode ierr; 4458 PetscMPIInt size; 4459 4460 PetscFunctionBegin; 4461 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4462 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4463 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4464 if (size > 1) { 4465 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4466 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4467 } else { 4468 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4469 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4470 } 4471 PetscFunctionReturn(0); 4472 } 4473 4474 /*@C 4475 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4476 4477 Not collective 4478 4479 Input Parameter: 4480 . A - The MPIAIJ matrix 4481 4482 Output Parameters: 4483 + Ad - The local diagonal block as a SeqAIJ matrix 4484 . Ao - The local off-diagonal block as a SeqAIJ matrix 4485 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4486 4487 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4488 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4489 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4490 local column numbers to global column numbers in the original matrix. 4491 4492 Level: intermediate 4493 4494 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4495 @*/ 4496 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4497 { 4498 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4499 PetscBool flg; 4500 PetscErrorCode ierr; 4501 4502 PetscFunctionBegin; 4503 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4504 PetscCheckFalse(!flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4505 if (Ad) *Ad = a->A; 4506 if (Ao) *Ao = a->B; 4507 if (colmap) *colmap = a->garray; 4508 PetscFunctionReturn(0); 4509 } 4510 4511 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4512 { 4513 PetscErrorCode ierr; 4514 PetscInt m,N,i,rstart,nnz,Ii; 4515 PetscInt *indx; 4516 PetscScalar *values; 4517 MatType rootType; 4518 4519 PetscFunctionBegin; 4520 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4521 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4522 PetscInt *dnz,*onz,sum,bs,cbs; 4523 4524 if (n == PETSC_DECIDE) { 4525 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4526 } 4527 /* Check sum(n) = N */ 4528 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4529 PetscCheckFalse(sum != N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 4530 4531 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4532 rstart -= m; 4533 4534 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4535 for (i=0; i<m; i++) { 4536 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4537 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4538 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4539 } 4540 4541 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4542 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4543 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4544 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4545 ierr = MatGetRootType_Private(inmat,&rootType);CHKERRQ(ierr); 4546 ierr = MatSetType(*outmat,rootType);CHKERRQ(ierr); 4547 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4548 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4549 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4550 ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4551 } 4552 4553 /* numeric phase */ 4554 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4555 for (i=0; i<m; i++) { 4556 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4557 Ii = i + rstart; 4558 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4559 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4560 } 4561 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4562 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4563 PetscFunctionReturn(0); 4564 } 4565 4566 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4567 { 4568 PetscErrorCode ierr; 4569 PetscMPIInt rank; 4570 PetscInt m,N,i,rstart,nnz; 4571 size_t len; 4572 const PetscInt *indx; 4573 PetscViewer out; 4574 char *name; 4575 Mat B; 4576 const PetscScalar *values; 4577 4578 PetscFunctionBegin; 4579 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4580 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4581 /* Should this be the type of the diagonal block of A? */ 4582 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4583 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4584 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4585 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4586 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4587 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4588 for (i=0; i<m; i++) { 4589 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4590 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4591 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4592 } 4593 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4594 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4595 4596 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 4597 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4598 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4599 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4600 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4601 ierr = PetscFree(name);CHKERRQ(ierr); 4602 ierr = MatView(B,out);CHKERRQ(ierr); 4603 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4604 ierr = MatDestroy(&B);CHKERRQ(ierr); 4605 PetscFunctionReturn(0); 4606 } 4607 4608 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4609 { 4610 PetscErrorCode ierr; 4611 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4612 4613 PetscFunctionBegin; 4614 if (!merge) PetscFunctionReturn(0); 4615 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4616 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4617 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4618 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4619 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4620 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4621 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4622 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4623 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4624 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4625 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4626 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4627 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4628 ierr = PetscFree(merge);CHKERRQ(ierr); 4629 PetscFunctionReturn(0); 4630 } 4631 4632 #include <../src/mat/utils/freespace.h> 4633 #include <petscbt.h> 4634 4635 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4636 { 4637 PetscErrorCode ierr; 4638 MPI_Comm comm; 4639 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4640 PetscMPIInt size,rank,taga,*len_s; 4641 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4642 PetscInt proc,m; 4643 PetscInt **buf_ri,**buf_rj; 4644 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4645 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4646 MPI_Request *s_waits,*r_waits; 4647 MPI_Status *status; 4648 const MatScalar *aa,*a_a; 4649 MatScalar **abuf_r,*ba_i; 4650 Mat_Merge_SeqsToMPI *merge; 4651 PetscContainer container; 4652 4653 PetscFunctionBegin; 4654 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4655 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4656 4657 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4658 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4659 4660 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4661 PetscCheckFalse(!container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4662 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4663 ierr = MatSeqAIJGetArrayRead(seqmat,&a_a);CHKERRQ(ierr); 4664 aa = a_a; 4665 4666 bi = merge->bi; 4667 bj = merge->bj; 4668 buf_ri = merge->buf_ri; 4669 buf_rj = merge->buf_rj; 4670 4671 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4672 owners = merge->rowmap->range; 4673 len_s = merge->len_s; 4674 4675 /* send and recv matrix values */ 4676 /*-----------------------------*/ 4677 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4678 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4679 4680 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4681 for (proc=0,k=0; proc<size; proc++) { 4682 if (!len_s[proc]) continue; 4683 i = owners[proc]; 4684 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr); 4685 k++; 4686 } 4687 4688 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);} 4689 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);} 4690 ierr = PetscFree(status);CHKERRQ(ierr); 4691 4692 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4693 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4694 4695 /* insert mat values of mpimat */ 4696 /*----------------------------*/ 4697 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4698 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4699 4700 for (k=0; k<merge->nrecv; k++) { 4701 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4702 nrows = *(buf_ri_k[k]); 4703 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4704 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4705 } 4706 4707 /* set values of ba */ 4708 m = merge->rowmap->n; 4709 for (i=0; i<m; i++) { 4710 arow = owners[rank] + i; 4711 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4712 bnzi = bi[i+1] - bi[i]; 4713 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4714 4715 /* add local non-zero vals of this proc's seqmat into ba */ 4716 anzi = ai[arow+1] - ai[arow]; 4717 aj = a->j + ai[arow]; 4718 aa = a_a + ai[arow]; 4719 nextaj = 0; 4720 for (j=0; nextaj<anzi; j++) { 4721 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4722 ba_i[j] += aa[nextaj++]; 4723 } 4724 } 4725 4726 /* add received vals into ba */ 4727 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4728 /* i-th row */ 4729 if (i == *nextrow[k]) { 4730 anzi = *(nextai[k]+1) - *nextai[k]; 4731 aj = buf_rj[k] + *(nextai[k]); 4732 aa = abuf_r[k] + *(nextai[k]); 4733 nextaj = 0; 4734 for (j=0; nextaj<anzi; j++) { 4735 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4736 ba_i[j] += aa[nextaj++]; 4737 } 4738 } 4739 nextrow[k]++; nextai[k]++; 4740 } 4741 } 4742 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4743 } 4744 ierr = MatSeqAIJRestoreArrayRead(seqmat,&a_a);CHKERRQ(ierr); 4745 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4746 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4747 4748 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4749 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4750 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4751 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4752 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4753 PetscFunctionReturn(0); 4754 } 4755 4756 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4757 { 4758 PetscErrorCode ierr; 4759 Mat B_mpi; 4760 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4761 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4762 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4763 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4764 PetscInt len,proc,*dnz,*onz,bs,cbs; 4765 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4766 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4767 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4768 MPI_Status *status; 4769 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4770 PetscBT lnkbt; 4771 Mat_Merge_SeqsToMPI *merge; 4772 PetscContainer container; 4773 4774 PetscFunctionBegin; 4775 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4776 4777 /* make sure it is a PETSc comm */ 4778 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4779 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4780 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4781 4782 ierr = PetscNew(&merge);CHKERRQ(ierr); 4783 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4784 4785 /* determine row ownership */ 4786 /*---------------------------------------------------------*/ 4787 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4788 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4789 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4790 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4791 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4792 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4793 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4794 4795 m = merge->rowmap->n; 4796 owners = merge->rowmap->range; 4797 4798 /* determine the number of messages to send, their lengths */ 4799 /*---------------------------------------------------------*/ 4800 len_s = merge->len_s; 4801 4802 len = 0; /* length of buf_si[] */ 4803 merge->nsend = 0; 4804 for (proc=0; proc<size; proc++) { 4805 len_si[proc] = 0; 4806 if (proc == rank) { 4807 len_s[proc] = 0; 4808 } else { 4809 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4810 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4811 } 4812 if (len_s[proc]) { 4813 merge->nsend++; 4814 nrows = 0; 4815 for (i=owners[proc]; i<owners[proc+1]; i++) { 4816 if (ai[i+1] > ai[i]) nrows++; 4817 } 4818 len_si[proc] = 2*(nrows+1); 4819 len += len_si[proc]; 4820 } 4821 } 4822 4823 /* determine the number and length of messages to receive for ij-structure */ 4824 /*-------------------------------------------------------------------------*/ 4825 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4826 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4827 4828 /* post the Irecv of j-structure */ 4829 /*-------------------------------*/ 4830 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4831 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4832 4833 /* post the Isend of j-structure */ 4834 /*--------------------------------*/ 4835 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4836 4837 for (proc=0, k=0; proc<size; proc++) { 4838 if (!len_s[proc]) continue; 4839 i = owners[proc]; 4840 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr); 4841 k++; 4842 } 4843 4844 /* receives and sends of j-structure are complete */ 4845 /*------------------------------------------------*/ 4846 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);} 4847 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);} 4848 4849 /* send and recv i-structure */ 4850 /*---------------------------*/ 4851 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4852 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4853 4854 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4855 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4856 for (proc=0,k=0; proc<size; proc++) { 4857 if (!len_s[proc]) continue; 4858 /* form outgoing message for i-structure: 4859 buf_si[0]: nrows to be sent 4860 [1:nrows]: row index (global) 4861 [nrows+1:2*nrows+1]: i-structure index 4862 */ 4863 /*-------------------------------------------*/ 4864 nrows = len_si[proc]/2 - 1; 4865 buf_si_i = buf_si + nrows+1; 4866 buf_si[0] = nrows; 4867 buf_si_i[0] = 0; 4868 nrows = 0; 4869 for (i=owners[proc]; i<owners[proc+1]; i++) { 4870 anzi = ai[i+1] - ai[i]; 4871 if (anzi) { 4872 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4873 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4874 nrows++; 4875 } 4876 } 4877 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr); 4878 k++; 4879 buf_si += len_si[proc]; 4880 } 4881 4882 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);} 4883 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);} 4884 4885 ierr = PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4886 for (i=0; i<merge->nrecv; i++) { 4887 ierr = PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4888 } 4889 4890 ierr = PetscFree(len_si);CHKERRQ(ierr); 4891 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4892 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4893 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4894 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4895 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4896 ierr = PetscFree(status);CHKERRQ(ierr); 4897 4898 /* compute a local seq matrix in each processor */ 4899 /*----------------------------------------------*/ 4900 /* allocate bi array and free space for accumulating nonzero column info */ 4901 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4902 bi[0] = 0; 4903 4904 /* create and initialize a linked list */ 4905 nlnk = N+1; 4906 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4907 4908 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4909 len = ai[owners[rank+1]] - ai[owners[rank]]; 4910 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4911 4912 current_space = free_space; 4913 4914 /* determine symbolic info for each local row */ 4915 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4916 4917 for (k=0; k<merge->nrecv; k++) { 4918 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4919 nrows = *buf_ri_k[k]; 4920 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4921 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4922 } 4923 4924 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4925 len = 0; 4926 for (i=0; i<m; i++) { 4927 bnzi = 0; 4928 /* add local non-zero cols of this proc's seqmat into lnk */ 4929 arow = owners[rank] + i; 4930 anzi = ai[arow+1] - ai[arow]; 4931 aj = a->j + ai[arow]; 4932 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4933 bnzi += nlnk; 4934 /* add received col data into lnk */ 4935 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4936 if (i == *nextrow[k]) { /* i-th row */ 4937 anzi = *(nextai[k]+1) - *nextai[k]; 4938 aj = buf_rj[k] + *nextai[k]; 4939 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4940 bnzi += nlnk; 4941 nextrow[k]++; nextai[k]++; 4942 } 4943 } 4944 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4945 4946 /* if free space is not available, make more free space */ 4947 if (current_space->local_remaining<bnzi) { 4948 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4949 nspacedouble++; 4950 } 4951 /* copy data into free space, then initialize lnk */ 4952 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4953 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4954 4955 current_space->array += bnzi; 4956 current_space->local_used += bnzi; 4957 current_space->local_remaining -= bnzi; 4958 4959 bi[i+1] = bi[i] + bnzi; 4960 } 4961 4962 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4963 4964 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4965 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4966 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4967 4968 /* create symbolic parallel matrix B_mpi */ 4969 /*---------------------------------------*/ 4970 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4971 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4972 if (n==PETSC_DECIDE) { 4973 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4974 } else { 4975 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4976 } 4977 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4978 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4979 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4980 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4981 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4982 4983 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4984 B_mpi->assembled = PETSC_FALSE; 4985 merge->bi = bi; 4986 merge->bj = bj; 4987 merge->buf_ri = buf_ri; 4988 merge->buf_rj = buf_rj; 4989 merge->coi = NULL; 4990 merge->coj = NULL; 4991 merge->owners_co = NULL; 4992 4993 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4994 4995 /* attach the supporting struct to B_mpi for reuse */ 4996 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4997 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4998 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 4999 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5000 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5001 *mpimat = B_mpi; 5002 5003 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5004 PetscFunctionReturn(0); 5005 } 5006 5007 /*@C 5008 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5009 matrices from each processor 5010 5011 Collective 5012 5013 Input Parameters: 5014 + comm - the communicators the parallel matrix will live on 5015 . seqmat - the input sequential matrices 5016 . m - number of local rows (or PETSC_DECIDE) 5017 . n - number of local columns (or PETSC_DECIDE) 5018 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5019 5020 Output Parameter: 5021 . mpimat - the parallel matrix generated 5022 5023 Level: advanced 5024 5025 Notes: 5026 The dimensions of the sequential matrix in each processor MUST be the same. 5027 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5028 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5029 @*/ 5030 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5031 { 5032 PetscErrorCode ierr; 5033 PetscMPIInt size; 5034 5035 PetscFunctionBegin; 5036 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5037 if (size == 1) { 5038 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5039 if (scall == MAT_INITIAL_MATRIX) { 5040 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5041 } else { 5042 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5043 } 5044 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5045 PetscFunctionReturn(0); 5046 } 5047 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5048 if (scall == MAT_INITIAL_MATRIX) { 5049 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5050 } 5051 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5052 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5053 PetscFunctionReturn(0); 5054 } 5055 5056 /*@ 5057 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5058 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5059 with MatGetSize() 5060 5061 Not Collective 5062 5063 Input Parameters: 5064 + A - the matrix 5065 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5066 5067 Output Parameter: 5068 . A_loc - the local sequential matrix generated 5069 5070 Level: developer 5071 5072 Notes: 5073 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5074 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5075 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5076 modify the values of the returned A_loc. 5077 5078 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 5079 @*/ 5080 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5081 { 5082 PetscErrorCode ierr; 5083 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5084 Mat_SeqAIJ *mat,*a,*b; 5085 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5086 const PetscScalar *aa,*ba,*aav,*bav; 5087 PetscScalar *ca,*cam; 5088 PetscMPIInt size; 5089 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5090 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5091 PetscBool match; 5092 5093 PetscFunctionBegin; 5094 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5095 PetscCheckFalse(!match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5096 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5097 if (size == 1) { 5098 if (scall == MAT_INITIAL_MATRIX) { 5099 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5100 *A_loc = mpimat->A; 5101 } else if (scall == MAT_REUSE_MATRIX) { 5102 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5103 } 5104 PetscFunctionReturn(0); 5105 } 5106 5107 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5108 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5109 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5110 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5111 ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5112 ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5113 aa = aav; 5114 ba = bav; 5115 if (scall == MAT_INITIAL_MATRIX) { 5116 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5117 ci[0] = 0; 5118 for (i=0; i<am; i++) { 5119 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5120 } 5121 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5122 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5123 k = 0; 5124 for (i=0; i<am; i++) { 5125 ncols_o = bi[i+1] - bi[i]; 5126 ncols_d = ai[i+1] - ai[i]; 5127 /* off-diagonal portion of A */ 5128 for (jo=0; jo<ncols_o; jo++) { 5129 col = cmap[*bj]; 5130 if (col >= cstart) break; 5131 cj[k] = col; bj++; 5132 ca[k++] = *ba++; 5133 } 5134 /* diagonal portion of A */ 5135 for (j=0; j<ncols_d; j++) { 5136 cj[k] = cstart + *aj++; 5137 ca[k++] = *aa++; 5138 } 5139 /* off-diagonal portion of A */ 5140 for (j=jo; j<ncols_o; j++) { 5141 cj[k] = cmap[*bj++]; 5142 ca[k++] = *ba++; 5143 } 5144 } 5145 /* put together the new matrix */ 5146 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5147 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5148 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5149 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5150 mat->free_a = PETSC_TRUE; 5151 mat->free_ij = PETSC_TRUE; 5152 mat->nonew = 0; 5153 } else if (scall == MAT_REUSE_MATRIX) { 5154 mat =(Mat_SeqAIJ*)(*A_loc)->data; 5155 ci = mat->i; 5156 cj = mat->j; 5157 ierr = MatSeqAIJGetArrayWrite(*A_loc,&cam);CHKERRQ(ierr); 5158 for (i=0; i<am; i++) { 5159 /* off-diagonal portion of A */ 5160 ncols_o = bi[i+1] - bi[i]; 5161 for (jo=0; jo<ncols_o; jo++) { 5162 col = cmap[*bj]; 5163 if (col >= cstart) break; 5164 *cam++ = *ba++; bj++; 5165 } 5166 /* diagonal portion of A */ 5167 ncols_d = ai[i+1] - ai[i]; 5168 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5169 /* off-diagonal portion of A */ 5170 for (j=jo; j<ncols_o; j++) { 5171 *cam++ = *ba++; bj++; 5172 } 5173 } 5174 ierr = MatSeqAIJRestoreArrayWrite(*A_loc,&cam);CHKERRQ(ierr); 5175 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5176 ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5177 ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5178 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5179 PetscFunctionReturn(0); 5180 } 5181 5182 /*@ 5183 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5184 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5185 5186 Not Collective 5187 5188 Input Parameters: 5189 + A - the matrix 5190 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5191 5192 Output Parameters: 5193 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5194 - A_loc - the local sequential matrix generated 5195 5196 Level: developer 5197 5198 Notes: 5199 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5200 5201 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5202 5203 @*/ 5204 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5205 { 5206 PetscErrorCode ierr; 5207 Mat Ao,Ad; 5208 const PetscInt *cmap; 5209 PetscMPIInt size; 5210 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5211 5212 PetscFunctionBegin; 5213 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr); 5214 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5215 if (size == 1) { 5216 if (scall == MAT_INITIAL_MATRIX) { 5217 ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr); 5218 *A_loc = Ad; 5219 } else if (scall == MAT_REUSE_MATRIX) { 5220 ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5221 } 5222 if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); } 5223 PetscFunctionReturn(0); 5224 } 5225 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr); 5226 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5227 if (f) { 5228 ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr); 5229 } else { 5230 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5231 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5232 Mat_SeqAIJ *c; 5233 PetscInt *ai = a->i, *aj = a->j; 5234 PetscInt *bi = b->i, *bj = b->j; 5235 PetscInt *ci,*cj; 5236 const PetscScalar *aa,*ba; 5237 PetscScalar *ca; 5238 PetscInt i,j,am,dn,on; 5239 5240 ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr); 5241 ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr); 5242 ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr); 5243 ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr); 5244 if (scall == MAT_INITIAL_MATRIX) { 5245 PetscInt k; 5246 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5247 ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr); 5248 ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr); 5249 ci[0] = 0; 5250 for (i=0,k=0; i<am; i++) { 5251 const PetscInt ncols_o = bi[i+1] - bi[i]; 5252 const PetscInt ncols_d = ai[i+1] - ai[i]; 5253 ci[i+1] = ci[i] + ncols_o + ncols_d; 5254 /* diagonal portion of A */ 5255 for (j=0; j<ncols_d; j++,k++) { 5256 cj[k] = *aj++; 5257 ca[k] = *aa++; 5258 } 5259 /* off-diagonal portion of A */ 5260 for (j=0; j<ncols_o; j++,k++) { 5261 cj[k] = dn + *bj++; 5262 ca[k] = *ba++; 5263 } 5264 } 5265 /* put together the new matrix */ 5266 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr); 5267 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5268 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5269 c = (Mat_SeqAIJ*)(*A_loc)->data; 5270 c->free_a = PETSC_TRUE; 5271 c->free_ij = PETSC_TRUE; 5272 c->nonew = 0; 5273 ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr); 5274 } else if (scall == MAT_REUSE_MATRIX) { 5275 ierr = MatSeqAIJGetArrayWrite(*A_loc,&ca);CHKERRQ(ierr); 5276 for (i=0; i<am; i++) { 5277 const PetscInt ncols_d = ai[i+1] - ai[i]; 5278 const PetscInt ncols_o = bi[i+1] - bi[i]; 5279 /* diagonal portion of A */ 5280 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5281 /* off-diagonal portion of A */ 5282 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5283 } 5284 ierr = MatSeqAIJRestoreArrayWrite(*A_loc,&ca);CHKERRQ(ierr); 5285 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5286 ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr); 5287 ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr); 5288 if (glob) { 5289 PetscInt cst, *gidx; 5290 5291 ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr); 5292 ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr); 5293 for (i=0; i<dn; i++) gidx[i] = cst + i; 5294 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5295 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr); 5296 } 5297 } 5298 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5299 PetscFunctionReturn(0); 5300 } 5301 5302 /*@C 5303 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5304 5305 Not Collective 5306 5307 Input Parameters: 5308 + A - the matrix 5309 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5310 - row, col - index sets of rows and columns to extract (or NULL) 5311 5312 Output Parameter: 5313 . A_loc - the local sequential matrix generated 5314 5315 Level: developer 5316 5317 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5318 5319 @*/ 5320 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5321 { 5322 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5323 PetscErrorCode ierr; 5324 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5325 IS isrowa,iscola; 5326 Mat *aloc; 5327 PetscBool match; 5328 5329 PetscFunctionBegin; 5330 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5331 PetscCheckFalse(!match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5332 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5333 if (!row) { 5334 start = A->rmap->rstart; end = A->rmap->rend; 5335 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5336 } else { 5337 isrowa = *row; 5338 } 5339 if (!col) { 5340 start = A->cmap->rstart; 5341 cmap = a->garray; 5342 nzA = a->A->cmap->n; 5343 nzB = a->B->cmap->n; 5344 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5345 ncols = 0; 5346 for (i=0; i<nzB; i++) { 5347 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5348 else break; 5349 } 5350 imark = i; 5351 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5352 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5353 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5354 } else { 5355 iscola = *col; 5356 } 5357 if (scall != MAT_INITIAL_MATRIX) { 5358 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5359 aloc[0] = *A_loc; 5360 } 5361 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5362 if (!col) { /* attach global id of condensed columns */ 5363 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5364 } 5365 *A_loc = aloc[0]; 5366 ierr = PetscFree(aloc);CHKERRQ(ierr); 5367 if (!row) { 5368 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5369 } 5370 if (!col) { 5371 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5372 } 5373 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5374 PetscFunctionReturn(0); 5375 } 5376 5377 /* 5378 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5379 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5380 * on a global size. 5381 * */ 5382 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5383 { 5384 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5385 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5386 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5387 PetscMPIInt owner; 5388 PetscSFNode *iremote,*oiremote; 5389 const PetscInt *lrowindices; 5390 PetscErrorCode ierr; 5391 PetscSF sf,osf; 5392 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5393 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5394 MPI_Comm comm; 5395 ISLocalToGlobalMapping mapping; 5396 const PetscScalar *pd_a,*po_a; 5397 5398 PetscFunctionBegin; 5399 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5400 /* plocalsize is the number of roots 5401 * nrows is the number of leaves 5402 * */ 5403 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5404 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5405 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5406 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5407 for (i=0;i<nrows;i++) { 5408 /* Find a remote index and an owner for a row 5409 * The row could be local or remote 5410 * */ 5411 owner = 0; 5412 lidx = 0; 5413 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5414 iremote[i].index = lidx; 5415 iremote[i].rank = owner; 5416 } 5417 /* Create SF to communicate how many nonzero columns for each row */ 5418 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5419 /* SF will figure out the number of nonzero colunms for each row, and their 5420 * offsets 5421 * */ 5422 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5423 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5424 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5425 5426 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5427 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5428 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5429 roffsets[0] = 0; 5430 roffsets[1] = 0; 5431 for (i=0;i<plocalsize;i++) { 5432 /* diag */ 5433 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5434 /* off diag */ 5435 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5436 /* compute offsets so that we relative location for each row */ 5437 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5438 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5439 } 5440 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5441 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5442 /* 'r' means root, and 'l' means leaf */ 5443 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5444 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5445 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5446 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5447 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5448 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5449 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5450 dntotalcols = 0; 5451 ontotalcols = 0; 5452 ncol = 0; 5453 for (i=0;i<nrows;i++) { 5454 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5455 ncol = PetscMax(pnnz[i],ncol); 5456 /* diag */ 5457 dntotalcols += nlcols[i*2+0]; 5458 /* off diag */ 5459 ontotalcols += nlcols[i*2+1]; 5460 } 5461 /* We do not need to figure the right number of columns 5462 * since all the calculations will be done by going through the raw data 5463 * */ 5464 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5465 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5466 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5467 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5468 /* diag */ 5469 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5470 /* off diag */ 5471 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5472 /* diag */ 5473 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5474 /* off diag */ 5475 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5476 dntotalcols = 0; 5477 ontotalcols = 0; 5478 ntotalcols = 0; 5479 for (i=0;i<nrows;i++) { 5480 owner = 0; 5481 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5482 /* Set iremote for diag matrix */ 5483 for (j=0;j<nlcols[i*2+0];j++) { 5484 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5485 iremote[dntotalcols].rank = owner; 5486 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5487 ilocal[dntotalcols++] = ntotalcols++; 5488 } 5489 /* off diag */ 5490 for (j=0;j<nlcols[i*2+1];j++) { 5491 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5492 oiremote[ontotalcols].rank = owner; 5493 oilocal[ontotalcols++] = ntotalcols++; 5494 } 5495 } 5496 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5497 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5498 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5499 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5500 /* P serves as roots and P_oth is leaves 5501 * Diag matrix 5502 * */ 5503 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5504 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5505 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5506 5507 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5508 /* Off diag */ 5509 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5510 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5511 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5512 ierr = MatSeqAIJGetArrayRead(p->A,&pd_a);CHKERRQ(ierr); 5513 ierr = MatSeqAIJGetArrayRead(p->B,&po_a);CHKERRQ(ierr); 5514 /* We operate on the matrix internal data for saving memory */ 5515 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5516 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5517 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5518 /* Convert to global indices for diag matrix */ 5519 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5520 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5521 /* We want P_oth store global indices */ 5522 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5523 /* Use memory scalable approach */ 5524 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5525 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5526 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5527 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5528 /* Convert back to local indices */ 5529 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5530 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5531 nout = 0; 5532 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5533 PetscCheckFalse(nout != po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout); 5534 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5535 /* Exchange values */ 5536 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5537 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5538 ierr = MatSeqAIJRestoreArrayRead(p->A,&pd_a);CHKERRQ(ierr); 5539 ierr = MatSeqAIJRestoreArrayRead(p->B,&po_a);CHKERRQ(ierr); 5540 /* Stop PETSc from shrinking memory */ 5541 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5542 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5543 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5544 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5545 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5546 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5547 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5548 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5549 PetscFunctionReturn(0); 5550 } 5551 5552 /* 5553 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5554 * This supports MPIAIJ and MAIJ 5555 * */ 5556 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5557 { 5558 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5559 Mat_SeqAIJ *p_oth; 5560 IS rows,map; 5561 PetscHMapI hamp; 5562 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5563 MPI_Comm comm; 5564 PetscSF sf,osf; 5565 PetscBool has; 5566 PetscErrorCode ierr; 5567 5568 PetscFunctionBegin; 5569 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5570 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5571 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5572 * and then create a submatrix (that often is an overlapping matrix) 5573 * */ 5574 if (reuse == MAT_INITIAL_MATRIX) { 5575 /* Use a hash table to figure out unique keys */ 5576 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5577 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5578 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5579 count = 0; 5580 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5581 for (i=0;i<a->B->cmap->n;i++) { 5582 key = a->garray[i]/dof; 5583 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5584 if (!has) { 5585 mapping[i] = count; 5586 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5587 } else { 5588 /* Current 'i' has the same value the previous step */ 5589 mapping[i] = count-1; 5590 } 5591 } 5592 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5593 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5594 PetscCheckFalse(htsize!=count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count); 5595 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5596 off = 0; 5597 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5598 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5599 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5600 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5601 /* In case, the matrix was already created but users want to recreate the matrix */ 5602 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5603 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5604 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5605 ierr = ISDestroy(&map);CHKERRQ(ierr); 5606 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5607 } else if (reuse == MAT_REUSE_MATRIX) { 5608 /* If matrix was already created, we simply update values using SF objects 5609 * that as attached to the matrix ealier. 5610 */ 5611 const PetscScalar *pd_a,*po_a; 5612 5613 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5614 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5615 PetscCheckFalse(!sf || !osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5616 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5617 /* Update values in place */ 5618 ierr = MatSeqAIJGetArrayRead(p->A,&pd_a);CHKERRQ(ierr); 5619 ierr = MatSeqAIJGetArrayRead(p->B,&po_a);CHKERRQ(ierr); 5620 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5621 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5622 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5623 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5624 ierr = MatSeqAIJRestoreArrayRead(p->A,&pd_a);CHKERRQ(ierr); 5625 ierr = MatSeqAIJRestoreArrayRead(p->B,&po_a);CHKERRQ(ierr); 5626 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5627 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5628 PetscFunctionReturn(0); 5629 } 5630 5631 /*@C 5632 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5633 5634 Collective on Mat 5635 5636 Input Parameters: 5637 + A - the first matrix in mpiaij format 5638 . B - the second matrix in mpiaij format 5639 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5640 5641 Output Parameters: 5642 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5643 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5644 - B_seq - the sequential matrix generated 5645 5646 Level: developer 5647 5648 @*/ 5649 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5650 { 5651 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5652 PetscErrorCode ierr; 5653 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5654 IS isrowb,iscolb; 5655 Mat *bseq=NULL; 5656 5657 PetscFunctionBegin; 5658 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5659 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5660 } 5661 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5662 5663 if (scall == MAT_INITIAL_MATRIX) { 5664 start = A->cmap->rstart; 5665 cmap = a->garray; 5666 nzA = a->A->cmap->n; 5667 nzB = a->B->cmap->n; 5668 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5669 ncols = 0; 5670 for (i=0; i<nzB; i++) { /* row < local row index */ 5671 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5672 else break; 5673 } 5674 imark = i; 5675 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5676 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5677 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5678 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5679 } else { 5680 PetscCheckFalse(!rowb || !colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5681 isrowb = *rowb; iscolb = *colb; 5682 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5683 bseq[0] = *B_seq; 5684 } 5685 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5686 *B_seq = bseq[0]; 5687 ierr = PetscFree(bseq);CHKERRQ(ierr); 5688 if (!rowb) { 5689 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5690 } else { 5691 *rowb = isrowb; 5692 } 5693 if (!colb) { 5694 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5695 } else { 5696 *colb = iscolb; 5697 } 5698 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5699 PetscFunctionReturn(0); 5700 } 5701 5702 /* 5703 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5704 of the OFF-DIAGONAL portion of local A 5705 5706 Collective on Mat 5707 5708 Input Parameters: 5709 + A,B - the matrices in mpiaij format 5710 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5711 5712 Output Parameter: 5713 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5714 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5715 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5716 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5717 5718 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5719 for this matrix. This is not desirable.. 5720 5721 Level: developer 5722 5723 */ 5724 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5725 { 5726 PetscErrorCode ierr; 5727 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5728 Mat_SeqAIJ *b_oth; 5729 VecScatter ctx; 5730 MPI_Comm comm; 5731 const PetscMPIInt *rprocs,*sprocs; 5732 const PetscInt *srow,*rstarts,*sstarts; 5733 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5734 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5735 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5736 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5737 PetscMPIInt size,tag,rank,nreqs; 5738 5739 PetscFunctionBegin; 5740 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5741 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5742 5743 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5744 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5745 } 5746 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5747 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 5748 5749 if (size == 1) { 5750 startsj_s = NULL; 5751 bufa_ptr = NULL; 5752 *B_oth = NULL; 5753 PetscFunctionReturn(0); 5754 } 5755 5756 ctx = a->Mvctx; 5757 tag = ((PetscObject)ctx)->tag; 5758 5759 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5760 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5761 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5762 ierr = PetscMPIIntCast(nsends+nrecvs,&nreqs);CHKERRQ(ierr); 5763 ierr = PetscMalloc1(nreqs,&reqs);CHKERRQ(ierr); 5764 rwaits = reqs; 5765 swaits = reqs + nrecvs; 5766 5767 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5768 if (scall == MAT_INITIAL_MATRIX) { 5769 /* i-array */ 5770 /*---------*/ 5771 /* post receives */ 5772 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5773 for (i=0; i<nrecvs; i++) { 5774 rowlen = rvalues + rstarts[i]*rbs; 5775 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5776 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5777 } 5778 5779 /* pack the outgoing message */ 5780 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5781 5782 sstartsj[0] = 0; 5783 rstartsj[0] = 0; 5784 len = 0; /* total length of j or a array to be sent */ 5785 if (nsends) { 5786 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5787 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5788 } 5789 for (i=0; i<nsends; i++) { 5790 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5791 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5792 for (j=0; j<nrows; j++) { 5793 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5794 for (l=0; l<sbs; l++) { 5795 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5796 5797 rowlen[j*sbs+l] = ncols; 5798 5799 len += ncols; 5800 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5801 } 5802 k++; 5803 } 5804 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5805 5806 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5807 } 5808 /* recvs and sends of i-array are completed */ 5809 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5810 ierr = PetscFree(svalues);CHKERRQ(ierr); 5811 5812 /* allocate buffers for sending j and a arrays */ 5813 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5814 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5815 5816 /* create i-array of B_oth */ 5817 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5818 5819 b_othi[0] = 0; 5820 len = 0; /* total length of j or a array to be received */ 5821 k = 0; 5822 for (i=0; i<nrecvs; i++) { 5823 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5824 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5825 for (j=0; j<nrows; j++) { 5826 b_othi[k+1] = b_othi[k] + rowlen[j]; 5827 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5828 k++; 5829 } 5830 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5831 } 5832 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5833 5834 /* allocate space for j and a arrrays of B_oth */ 5835 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5836 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5837 5838 /* j-array */ 5839 /*---------*/ 5840 /* post receives of j-array */ 5841 for (i=0; i<nrecvs; i++) { 5842 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5843 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5844 } 5845 5846 /* pack the outgoing message j-array */ 5847 if (nsends) k = sstarts[0]; 5848 for (i=0; i<nsends; i++) { 5849 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5850 bufJ = bufj+sstartsj[i]; 5851 for (j=0; j<nrows; j++) { 5852 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5853 for (ll=0; ll<sbs; ll++) { 5854 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5855 for (l=0; l<ncols; l++) { 5856 *bufJ++ = cols[l]; 5857 } 5858 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5859 } 5860 } 5861 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5862 } 5863 5864 /* recvs and sends of j-array are completed */ 5865 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5866 } else if (scall == MAT_REUSE_MATRIX) { 5867 sstartsj = *startsj_s; 5868 rstartsj = *startsj_r; 5869 bufa = *bufa_ptr; 5870 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5871 ierr = MatSeqAIJGetArrayWrite(*B_oth,&b_otha);CHKERRQ(ierr); 5872 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5873 5874 /* a-array */ 5875 /*---------*/ 5876 /* post receives of a-array */ 5877 for (i=0; i<nrecvs; i++) { 5878 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5879 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5880 } 5881 5882 /* pack the outgoing message a-array */ 5883 if (nsends) k = sstarts[0]; 5884 for (i=0; i<nsends; i++) { 5885 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5886 bufA = bufa+sstartsj[i]; 5887 for (j=0; j<nrows; j++) { 5888 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5889 for (ll=0; ll<sbs; ll++) { 5890 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5891 for (l=0; l<ncols; l++) { 5892 *bufA++ = vals[l]; 5893 } 5894 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5895 } 5896 } 5897 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5898 } 5899 /* recvs and sends of a-array are completed */ 5900 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5901 ierr = PetscFree(reqs);CHKERRQ(ierr); 5902 5903 if (scall == MAT_INITIAL_MATRIX) { 5904 /* put together the new matrix */ 5905 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5906 5907 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5908 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5909 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5910 b_oth->free_a = PETSC_TRUE; 5911 b_oth->free_ij = PETSC_TRUE; 5912 b_oth->nonew = 0; 5913 5914 ierr = PetscFree(bufj);CHKERRQ(ierr); 5915 if (!startsj_s || !bufa_ptr) { 5916 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5917 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5918 } else { 5919 *startsj_s = sstartsj; 5920 *startsj_r = rstartsj; 5921 *bufa_ptr = bufa; 5922 } 5923 } else if (scall == MAT_REUSE_MATRIX) { 5924 ierr = MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha);CHKERRQ(ierr); 5925 } 5926 5927 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5928 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5929 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5930 PetscFunctionReturn(0); 5931 } 5932 5933 /*@C 5934 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5935 5936 Not Collective 5937 5938 Input Parameter: 5939 . A - The matrix in mpiaij format 5940 5941 Output Parameters: 5942 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5943 . colmap - A map from global column index to local index into lvec 5944 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5945 5946 Level: developer 5947 5948 @*/ 5949 #if defined(PETSC_USE_CTABLE) 5950 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5951 #else 5952 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5953 #endif 5954 { 5955 Mat_MPIAIJ *a; 5956 5957 PetscFunctionBegin; 5958 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5959 PetscValidPointer(lvec, 2); 5960 PetscValidPointer(colmap, 3); 5961 PetscValidPointer(multScatter, 4); 5962 a = (Mat_MPIAIJ*) A->data; 5963 if (lvec) *lvec = a->lvec; 5964 if (colmap) *colmap = a->colmap; 5965 if (multScatter) *multScatter = a->Mvctx; 5966 PetscFunctionReturn(0); 5967 } 5968 5969 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5970 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5971 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5972 #if defined(PETSC_HAVE_MKL_SPARSE) 5973 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5974 #endif 5975 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5976 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5977 #if defined(PETSC_HAVE_ELEMENTAL) 5978 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5979 #endif 5980 #if defined(PETSC_HAVE_SCALAPACK) 5981 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5982 #endif 5983 #if defined(PETSC_HAVE_HYPRE) 5984 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5985 #endif 5986 #if defined(PETSC_HAVE_CUDA) 5987 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5988 #endif 5989 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5990 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5991 #endif 5992 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5993 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5994 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5995 5996 /* 5997 Computes (B'*A')' since computing B*A directly is untenable 5998 5999 n p p 6000 [ ] [ ] [ ] 6001 m [ A ] * n [ B ] = m [ C ] 6002 [ ] [ ] [ ] 6003 6004 */ 6005 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 6006 { 6007 PetscErrorCode ierr; 6008 Mat At,Bt,Ct; 6009 6010 PetscFunctionBegin; 6011 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 6012 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 6013 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 6014 ierr = MatDestroy(&At);CHKERRQ(ierr); 6015 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 6016 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 6017 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 6018 PetscFunctionReturn(0); 6019 } 6020 6021 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 6022 { 6023 PetscErrorCode ierr; 6024 PetscBool cisdense; 6025 6026 PetscFunctionBegin; 6027 PetscCheckFalse(A->cmap->n != B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n); 6028 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 6029 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 6030 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 6031 if (!cisdense) { 6032 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6033 } 6034 ierr = MatSetUp(C);CHKERRQ(ierr); 6035 6036 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6037 PetscFunctionReturn(0); 6038 } 6039 6040 /* ----------------------------------------------------------------*/ 6041 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6042 { 6043 Mat_Product *product = C->product; 6044 Mat A = product->A,B=product->B; 6045 6046 PetscFunctionBegin; 6047 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6048 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6049 6050 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6051 C->ops->productsymbolic = MatProductSymbolic_AB; 6052 PetscFunctionReturn(0); 6053 } 6054 6055 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6056 { 6057 PetscErrorCode ierr; 6058 Mat_Product *product = C->product; 6059 6060 PetscFunctionBegin; 6061 if (product->type == MATPRODUCT_AB) { 6062 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 6063 } 6064 PetscFunctionReturn(0); 6065 } 6066 6067 /* std::upper_bound(): Given a sorted array, return index of the first element in range [first,last) whose value 6068 is greater than value, or last if there is no such element. 6069 */ 6070 static inline PetscErrorCode PetscSortedIntUpperBound(PetscInt *array,PetscCount first,PetscCount last,PetscInt value,PetscCount *upper) 6071 { 6072 PetscCount it,step,count = last - first; 6073 6074 PetscFunctionBegin; 6075 while (count > 0) { 6076 it = first; 6077 step = count / 2; 6078 it += step; 6079 if (!(value < array[it])) { 6080 first = ++it; 6081 count -= step + 1; 6082 } else count = step; 6083 } 6084 *upper = first; 6085 PetscFunctionReturn(0); 6086 } 6087 6088 /* Merge two sets of sorted nonzero entries and return a CSR for the merged (sequential) matrix 6089 6090 Input Parameters: 6091 6092 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6093 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6094 6095 mat: both sets' entries are on m rows, where m is the number of local rows of the matrix mat 6096 6097 For Set1, j1[] contains column indices of the nonzeros. 6098 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6099 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6100 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6101 6102 Similar for Set2. 6103 6104 This routine merges the two sets of nonzeros row by row and removes repeats. 6105 6106 Output Parameters: (memories are allocated by the caller) 6107 6108 i[],j[]: the CSR of the merged matrix, which has m rows. 6109 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6110 imap2[]: similar to imap1[], but for Set2. 6111 Note we order nonzeros row-by-row and from left to right. 6112 */ 6113 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[], 6114 const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[], 6115 PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[]) 6116 { 6117 PetscErrorCode ierr; 6118 PetscInt r,m; /* Row index of mat */ 6119 PetscCount t,t1,t2,b1,e1,b2,e2; 6120 6121 PetscFunctionBegin; 6122 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 6123 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6124 i[0] = 0; 6125 for (r=0; r<m; r++) { /* Do row by row merging */ 6126 b1 = rowBegin1[r]; 6127 e1 = rowEnd1[r]; 6128 b2 = rowBegin2[r]; 6129 e2 = rowEnd2[r]; 6130 while (b1 < e1 && b2 < e2) { 6131 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6132 j[t] = j1[b1]; 6133 imap1[t1] = t; 6134 imap2[t2] = t; 6135 b1 += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6136 b2 += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6137 t1++; t2++; t++; 6138 } else if (j1[b1] < j2[b2]) { 6139 j[t] = j1[b1]; 6140 imap1[t1] = t; 6141 b1 += jmap1[t1+1] - jmap1[t1]; 6142 t1++; t++; 6143 } else { 6144 j[t] = j2[b2]; 6145 imap2[t2] = t; 6146 b2 += jmap2[t2+1] - jmap2[t2]; 6147 t2++; t++; 6148 } 6149 } 6150 /* Merge the remaining in either j1[] or j2[] */ 6151 while (b1 < e1) { 6152 j[t] = j1[b1]; 6153 imap1[t1] = t; 6154 b1 += jmap1[t1+1] - jmap1[t1]; 6155 t1++; t++; 6156 } 6157 while (b2 < e2) { 6158 j[t] = j2[b2]; 6159 imap2[t2] = t; 6160 b2 += jmap2[t2+1] - jmap2[t2]; 6161 t2++; t++; 6162 } 6163 i[r+1] = t; 6164 } 6165 PetscFunctionReturn(0); 6166 } 6167 6168 /* Split a set/group of local entries into two subsets: those in the diagonal block and those in the off-diagonal block 6169 6170 Input Parameters: 6171 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6172 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6173 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6174 6175 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6176 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6177 6178 Output Parameters: 6179 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6180 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6181 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6182 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6183 6184 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6185 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6186 repeats (i.e., same 'i,j' pair). 6187 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6188 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6189 6190 Atot: number of entries belonging to the diagonal block 6191 Annz: number of unique nonzeros belonging to the diagonal block. 6192 6193 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6194 6195 Aperm[],Bperm[],Ajmap[],Bjmap[] are allocated by this routine with PetscMalloc4(). One has to free them with PetscFree4() in the exact order. 6196 */ 6197 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[], 6198 PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[], 6199 PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_, 6200 PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_) 6201 { 6202 PetscErrorCode ierr; 6203 PetscInt cstart,cend,rstart,rend,row,col; 6204 PetscCount Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6205 PetscCount Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6206 PetscCount k,m,p,q,r,s,mid; 6207 PetscCount *Aperm,*Bperm,*Ajmap,*Bjmap; 6208 6209 PetscFunctionBegin; 6210 ierr = PetscLayoutGetRange(mat->rmap,&rstart,&rend);CHKERRQ(ierr); 6211 ierr = PetscLayoutGetRange(mat->cmap,&cstart,&cend);CHKERRQ(ierr); 6212 m = rend - rstart; 6213 6214 for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */ 6215 6216 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6217 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6218 */ 6219 while (k<n) { 6220 row = i[k]; 6221 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6222 for (s=k; s<n; s++) if (i[s] != row) break; 6223 for (p=k; p<s; p++) { 6224 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6225 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]); 6226 } 6227 ierr = PetscSortIntWithCountArray(s-k,j+k,perm+k);CHKERRQ(ierr); 6228 ierr = PetscSortedIntUpperBound(j,k,s,-1,&mid);CHKERRQ(ierr); /* Seperate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6229 rowBegin[row-rstart] = k; 6230 rowMid[row-rstart] = mid; 6231 rowEnd[row-rstart] = s; 6232 6233 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6234 Atot += mid - k; 6235 Btot += s - mid; 6236 6237 /* Count unique nonzeros of this diag/offdiag row */ 6238 for (p=k; p<mid;) { 6239 col = j[p]; 6240 do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */ 6241 Annz++; 6242 } 6243 6244 for (p=mid; p<s;) { 6245 col = j[p]; 6246 do {p++;} while (p<s && j[p] == col); 6247 Bnnz++; 6248 } 6249 k = s; 6250 } 6251 6252 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6253 ierr = PetscMalloc4(Atot,&Aperm,Btot,&Bperm,Annz+1,&Ajmap,Bnnz+1,&Bjmap);CHKERRQ(ierr); 6254 6255 /* Re-scan indices and copy diag/offdiag permuation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6256 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6257 for (r=0; r<m; r++) { 6258 k = rowBegin[r]; 6259 mid = rowMid[r]; 6260 s = rowEnd[r]; 6261 ierr = PetscArraycpy(Aperm+Atot,perm+k, mid-k);CHKERRQ(ierr); 6262 ierr = PetscArraycpy(Bperm+Btot,perm+mid,s-mid);CHKERRQ(ierr); 6263 Atot += mid - k; 6264 Btot += s - mid; 6265 6266 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6267 for (p=k; p<mid;) { 6268 col = j[p]; 6269 q = p; 6270 do {p++;} while (p<mid && j[p] == col); 6271 Ajmap[Annz+1] = Ajmap[Annz] + (p - q); 6272 Annz++; 6273 } 6274 6275 for (p=mid; p<s;) { 6276 col = j[p]; 6277 q = p; 6278 do {p++;} while (p<s && j[p] == col); 6279 Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q); 6280 Bnnz++; 6281 } 6282 } 6283 /* Output */ 6284 *Aperm_ = Aperm; 6285 *Annz_ = Annz; 6286 *Atot_ = Atot; 6287 *Ajmap_ = Ajmap; 6288 *Bperm_ = Bperm; 6289 *Bnnz_ = Bnnz; 6290 *Btot_ = Btot; 6291 *Bjmap_ = Bjmap; 6292 PetscFunctionReturn(0); 6293 } 6294 6295 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[]) 6296 { 6297 PetscErrorCode ierr; 6298 MPI_Comm comm; 6299 PetscMPIInt rank,size; 6300 PetscInt m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6301 PetscCount k,p,q,rem; /* Loop variables over coo arrays */ 6302 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6303 6304 PetscFunctionBegin; 6305 ierr = PetscFree(mpiaij->garray);CHKERRQ(ierr); 6306 ierr = VecDestroy(&mpiaij->lvec);CHKERRQ(ierr); 6307 #if defined(PETSC_USE_CTABLE) 6308 ierr = PetscTableDestroy(&mpiaij->colmap);CHKERRQ(ierr); 6309 #else 6310 ierr = PetscFree(mpiaij->colmap);CHKERRQ(ierr); 6311 #endif 6312 ierr = VecScatterDestroy(&mpiaij->Mvctx);CHKERRQ(ierr); 6313 mat->assembled = PETSC_FALSE; 6314 mat->was_assembled = PETSC_FALSE; 6315 ierr = MatResetPreallocationCOO_MPIAIJ(mat);CHKERRQ(ierr); 6316 6317 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6318 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 6319 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 6320 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 6321 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 6322 ierr = PetscLayoutGetRange(mat->rmap,&rstart,&rend);CHKERRQ(ierr); 6323 ierr = PetscLayoutGetRange(mat->cmap,&cstart,&cend);CHKERRQ(ierr); 6324 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 6325 ierr = MatGetSize(mat,&M,&N);CHKERRQ(ierr); 6326 6327 /* ---------------------------------------------------------------------------*/ 6328 /* Sort (i,j) by row along with a permuation array, so that the to-be-ignored */ 6329 /* entries come first, then local rows, then remote rows. */ 6330 /* ---------------------------------------------------------------------------*/ 6331 PetscCount n1 = coo_n,*perm1; 6332 PetscInt *i1,*j1; /* Copies of input COOs along with a permutation array */ 6333 ierr = PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1);CHKERRQ(ierr); 6334 ierr = PetscArraycpy(i1,coo_i,n1);CHKERRQ(ierr); /* Make a copy since we'll modify it */ 6335 ierr = PetscArraycpy(j1,coo_j,n1);CHKERRQ(ierr); 6336 for (k=0; k<n1; k++) perm1[k] = k; 6337 6338 /* Manipulate indices so that entries with negative row or col indices will have smallest 6339 row indices, local entries will have greater but negative row indices, and remote entries 6340 will have positive row indices. 6341 */ 6342 for (k=0; k<n1; k++) { 6343 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6344 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6345 else PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6346 else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6347 } 6348 6349 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6350 ierr = PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1);CHKERRQ(ierr); 6351 for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */ 6352 ierr = PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem);CHKERRQ(ierr); /* rem is upper bound of the last local row */ 6353 for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6354 6355 /* ---------------------------------------------------------------------------*/ 6356 /* Split local rows into diag/offdiag portions */ 6357 /* ---------------------------------------------------------------------------*/ 6358 PetscCount *rowBegin1,*rowMid1,*rowEnd1; 6359 PetscCount *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1; 6360 PetscCount Annz1,Bnnz1,Atot1,Btot1; 6361 6362 ierr = PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1);CHKERRQ(ierr); 6363 ierr = PetscMalloc1(n1-rem,&Cperm1);CHKERRQ(ierr); 6364 ierr = MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1);CHKERRQ(ierr); 6365 6366 /* ---------------------------------------------------------------------------*/ 6367 /* Send remote rows to their owner */ 6368 /* ---------------------------------------------------------------------------*/ 6369 /* Find which rows should be sent to which remote ranks*/ 6370 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6371 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6372 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6373 const PetscInt *ranges; 6374 PetscInt maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6375 6376 ierr = PetscLayoutGetRanges(mat->rmap,&ranges);CHKERRQ(ierr); 6377 ierr = PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries);CHKERRQ(ierr); 6378 for (k=rem; k<n1;) { 6379 PetscMPIInt owner; 6380 PetscInt firstRow,lastRow; 6381 6382 /* Locate a row range */ 6383 firstRow = i1[k]; /* first row of this owner */ 6384 ierr = PetscLayoutFindOwner(mat->rmap,firstRow,&owner);CHKERRQ(ierr); 6385 lastRow = ranges[owner+1]-1; /* last row of this owner */ 6386 6387 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6388 ierr = PetscSortedIntUpperBound(i1,k,n1,lastRow,&p);CHKERRQ(ierr); 6389 6390 /* All entries in [k,p) belong to this remote owner */ 6391 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6392 PetscMPIInt *sendto2; 6393 PetscInt *nentries2; 6394 PetscInt maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size; 6395 6396 ierr = PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2);CHKERRQ(ierr); 6397 ierr = PetscArraycpy(sendto2,sendto,maxNsend);CHKERRQ(ierr); 6398 ierr = PetscArraycpy(nentries2,nentries2,maxNsend+1);CHKERRQ(ierr); 6399 ierr = PetscFree2(sendto,nentries2);CHKERRQ(ierr); 6400 sendto = sendto2; 6401 nentries = nentries2; 6402 maxNsend = maxNsend2; 6403 } 6404 sendto[nsend] = owner; 6405 nentries[nsend] = p - k; 6406 ierr = PetscCountCast(p-k,&nentries[nsend]);CHKERRQ(ierr); 6407 nsend++; 6408 k = p; 6409 } 6410 6411 /* Build 1st SF to know offsets on remote to send data */ 6412 PetscSF sf1; 6413 PetscInt nroots = 1,nroots2 = 0; 6414 PetscInt nleaves = nsend,nleaves2 = 0; 6415 PetscInt *offsets; 6416 PetscSFNode *iremote; 6417 6418 ierr = PetscSFCreate(comm,&sf1);CHKERRQ(ierr); 6419 ierr = PetscMalloc1(nsend,&iremote);CHKERRQ(ierr); 6420 ierr = PetscMalloc1(nsend,&offsets);CHKERRQ(ierr); 6421 for (k=0; k<nsend; k++) { 6422 iremote[k].rank = sendto[k]; 6423 iremote[k].index = 0; 6424 nleaves2 += nentries[k]; 6425 PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt"); 6426 } 6427 ierr = PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 6428 ierr = PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM);CHKERRQ(ierr); 6429 ierr = PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM);CHKERRQ(ierr); /* Would nroots2 overflow, we check offsets[] below */ 6430 ierr = PetscSFDestroy(&sf1);CHKERRQ(ierr); 6431 PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 " PetscInt_FMT " != number of remote entries " PetscCount_FMT "",nleaves2,n1-rem); 6432 6433 /* Build 2nd SF to send remote COOs to their owner */ 6434 PetscSF sf2; 6435 nroots = nroots2; 6436 nleaves = nleaves2; 6437 ierr = PetscSFCreate(comm,&sf2);CHKERRQ(ierr); 6438 ierr = PetscSFSetFromOptions(sf2);CHKERRQ(ierr); 6439 ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr); 6440 p = 0; 6441 for (k=0; k<nsend; k++) { 6442 PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt"); 6443 for (q=0; q<nentries[k]; q++,p++) { 6444 iremote[p].rank = sendto[k]; 6445 iremote[p].index = offsets[k] + q; 6446 } 6447 } 6448 ierr = PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 6449 6450 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permuation which will be used to fill leafdata */ 6451 ierr = PetscArraycpy(Cperm1,perm1+rem,n1-rem);CHKERRQ(ierr); 6452 6453 /* Send the remote COOs to their owner */ 6454 PetscInt n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6455 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6456 ierr = PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2);CHKERRQ(ierr); 6457 ierr = PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE);CHKERRQ(ierr); 6458 ierr = PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE);CHKERRQ(ierr); 6459 ierr = PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE);CHKERRQ(ierr); 6460 ierr = PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE);CHKERRQ(ierr); 6461 6462 ierr = PetscFree(offsets);CHKERRQ(ierr); 6463 ierr = PetscFree2(sendto,nentries);CHKERRQ(ierr); 6464 6465 /* ---------------------------------------------------------------*/ 6466 /* Sort received COOs by row along with the permutation array */ 6467 /* ---------------------------------------------------------------*/ 6468 for (k=0; k<n2; k++) perm2[k] = k; 6469 ierr = PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2);CHKERRQ(ierr); 6470 6471 /* ---------------------------------------------------------------*/ 6472 /* Split received COOs into diag/offdiag portions */ 6473 /* ---------------------------------------------------------------*/ 6474 PetscCount *rowBegin2,*rowMid2,*rowEnd2; 6475 PetscCount *Ajmap2,*Aperm2,*Bjmap2,*Bperm2; 6476 PetscCount Annz2,Bnnz2,Atot2,Btot2; 6477 6478 ierr = PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2);CHKERRQ(ierr); 6479 ierr = MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2);CHKERRQ(ierr); 6480 6481 /* --------------------------------------------------------------------------*/ 6482 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6483 /* --------------------------------------------------------------------------*/ 6484 PetscInt *Ai,*Bi; 6485 PetscInt *Aj,*Bj; 6486 6487 ierr = PetscMalloc1(m+1,&Ai);CHKERRQ(ierr); 6488 ierr = PetscMalloc1(m+1,&Bi);CHKERRQ(ierr); 6489 ierr = PetscMalloc1(Annz1+Annz2,&Aj);CHKERRQ(ierr); /* Since local and remote entries might have dups, we might allocate excess memory */ 6490 ierr = PetscMalloc1(Bnnz1+Bnnz2,&Bj);CHKERRQ(ierr); 6491 6492 PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2; 6493 ierr = PetscMalloc4(Annz1,&Aimap1,Bnnz1,&Bimap1,Annz2,&Aimap2,Bnnz2,&Bimap2);CHKERRQ(ierr); 6494 6495 ierr = MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj);CHKERRQ(ierr); 6496 ierr = MatMergeEntries_Internal(mat,j1,j2,rowMid1, rowEnd1,rowMid2, rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj);CHKERRQ(ierr); 6497 ierr = PetscFree3(rowBegin1,rowMid1,rowEnd1);CHKERRQ(ierr); 6498 ierr = PetscFree3(rowBegin2,rowMid2,rowEnd2);CHKERRQ(ierr); 6499 ierr = PetscFree3(i1,j1,perm1);CHKERRQ(ierr); 6500 ierr = PetscFree3(i2,j2,perm2);CHKERRQ(ierr); 6501 6502 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6503 PetscInt Annz = Ai[m]; 6504 PetscInt Bnnz = Bi[m]; 6505 if (Annz < Annz1 + Annz2) { 6506 PetscInt *Aj_new; 6507 ierr = PetscMalloc1(Annz,&Aj_new);CHKERRQ(ierr); 6508 ierr = PetscArraycpy(Aj_new,Aj,Annz);CHKERRQ(ierr); 6509 ierr = PetscFree(Aj);CHKERRQ(ierr); 6510 Aj = Aj_new; 6511 } 6512 6513 if (Bnnz < Bnnz1 + Bnnz2) { 6514 PetscInt *Bj_new; 6515 ierr = PetscMalloc1(Bnnz,&Bj_new);CHKERRQ(ierr); 6516 ierr = PetscArraycpy(Bj_new,Bj,Bnnz);CHKERRQ(ierr); 6517 ierr = PetscFree(Bj);CHKERRQ(ierr); 6518 Bj = Bj_new; 6519 } 6520 6521 /* --------------------------------------------------------------------------------*/ 6522 /* Create new submatrices for on-process and off-process coupling */ 6523 /* --------------------------------------------------------------------------------*/ 6524 PetscScalar *Aa,*Ba; 6525 MatType rtype; 6526 Mat_SeqAIJ *a,*b; 6527 ierr = PetscCalloc1(Annz,&Aa);CHKERRQ(ierr); /* Zero matrix on device */ 6528 ierr = PetscCalloc1(Bnnz,&Ba);CHKERRQ(ierr); 6529 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6530 if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;} 6531 ierr = MatDestroy(&mpiaij->A);CHKERRQ(ierr); 6532 ierr = MatDestroy(&mpiaij->B);CHKERRQ(ierr); 6533 ierr = MatGetRootType_Private(mat,&rtype);CHKERRQ(ierr); 6534 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A);CHKERRQ(ierr); 6535 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B);CHKERRQ(ierr); 6536 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 6537 6538 a = (Mat_SeqAIJ*)mpiaij->A->data; 6539 b = (Mat_SeqAIJ*)mpiaij->B->data; 6540 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6541 a->free_a = b->free_a = PETSC_TRUE; 6542 a->free_ij = b->free_ij = PETSC_TRUE; 6543 6544 /* conversion must happen AFTER multiply setup */ 6545 ierr = MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A);CHKERRQ(ierr); 6546 ierr = MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B);CHKERRQ(ierr); 6547 ierr = VecDestroy(&mpiaij->lvec);CHKERRQ(ierr); 6548 ierr = MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL);CHKERRQ(ierr); 6549 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec);CHKERRQ(ierr); 6550 6551 mpiaij->coo_n = coo_n; 6552 mpiaij->coo_sf = sf2; 6553 mpiaij->sendlen = nleaves; 6554 mpiaij->recvlen = nroots; 6555 6556 mpiaij->Annz1 = Annz1; 6557 mpiaij->Annz2 = Annz2; 6558 mpiaij->Bnnz1 = Bnnz1; 6559 mpiaij->Bnnz2 = Bnnz2; 6560 6561 mpiaij->Atot1 = Atot1; 6562 mpiaij->Atot2 = Atot2; 6563 mpiaij->Btot1 = Btot1; 6564 mpiaij->Btot2 = Btot2; 6565 6566 mpiaij->Aimap1 = Aimap1; 6567 mpiaij->Aimap2 = Aimap2; 6568 mpiaij->Bimap1 = Bimap1; 6569 mpiaij->Bimap2 = Bimap2; 6570 6571 mpiaij->Ajmap1 = Ajmap1; 6572 mpiaij->Ajmap2 = Ajmap2; 6573 mpiaij->Bjmap1 = Bjmap1; 6574 mpiaij->Bjmap2 = Bjmap2; 6575 6576 mpiaij->Aperm1 = Aperm1; 6577 mpiaij->Aperm2 = Aperm2; 6578 mpiaij->Bperm1 = Bperm1; 6579 mpiaij->Bperm2 = Bperm2; 6580 6581 mpiaij->Cperm1 = Cperm1; 6582 6583 /* Allocate in preallocation. If not used, it has zero cost on host */ 6584 ierr = PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf);CHKERRQ(ierr); 6585 PetscFunctionReturn(0); 6586 } 6587 6588 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode) 6589 { 6590 PetscErrorCode ierr; 6591 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6592 Mat A = mpiaij->A,B = mpiaij->B; 6593 PetscCount Annz1 = mpiaij->Annz1,Annz2 = mpiaij->Annz2,Bnnz1 = mpiaij->Bnnz1,Bnnz2 = mpiaij->Bnnz2; 6594 PetscScalar *Aa,*Ba; 6595 PetscScalar *sendbuf = mpiaij->sendbuf; 6596 PetscScalar *recvbuf = mpiaij->recvbuf; 6597 const PetscCount *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap1 = mpiaij->Aimap1,*Aimap2 = mpiaij->Aimap2; 6598 const PetscCount *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap1 = mpiaij->Bimap1,*Bimap2 = mpiaij->Bimap2; 6599 const PetscCount *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2; 6600 const PetscCount *Cperm1 = mpiaij->Cperm1; 6601 6602 PetscFunctionBegin; 6603 ierr = MatSeqAIJGetArray(A,&Aa);CHKERRQ(ierr); /* Might read and write matrix values */ 6604 ierr = MatSeqAIJGetArray(B,&Ba);CHKERRQ(ierr); 6605 if (imode == INSERT_VALUES) { 6606 ierr = PetscMemzero(Aa,((Mat_SeqAIJ*)A->data)->nz*sizeof(PetscScalar));CHKERRQ(ierr); 6607 ierr = PetscMemzero(Ba,((Mat_SeqAIJ*)B->data)->nz*sizeof(PetscScalar));CHKERRQ(ierr); 6608 } 6609 6610 /* Pack entries to be sent to remote */ 6611 for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6612 6613 /* Send remote entries to their owner and overlap the communication with local computation */ 6614 ierr = PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE);CHKERRQ(ierr); 6615 /* Add local entries to A and B */ 6616 for (PetscCount i=0; i<Annz1; i++) { 6617 for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) Aa[Aimap1[i]] += v[Aperm1[k]]; 6618 } 6619 for (PetscCount i=0; i<Bnnz1; i++) { 6620 for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) Ba[Bimap1[i]] += v[Bperm1[k]]; 6621 } 6622 ierr = PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE);CHKERRQ(ierr); 6623 6624 /* Add received remote entries to A and B */ 6625 for (PetscCount i=0; i<Annz2; i++) { 6626 for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6627 } 6628 for (PetscCount i=0; i<Bnnz2; i++) { 6629 for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6630 } 6631 ierr = MatSeqAIJRestoreArray(A,&Aa);CHKERRQ(ierr); 6632 ierr = MatSeqAIJRestoreArray(B,&Ba);CHKERRQ(ierr); 6633 PetscFunctionReturn(0); 6634 } 6635 6636 /* ----------------------------------------------------------------*/ 6637 6638 /*MC 6639 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6640 6641 Options Database Keys: 6642 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6643 6644 Level: beginner 6645 6646 Notes: 6647 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6648 in this case the values associated with the rows and columns one passes in are set to zero 6649 in the matrix 6650 6651 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6652 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6653 6654 .seealso: MatCreateAIJ() 6655 M*/ 6656 6657 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6658 { 6659 Mat_MPIAIJ *b; 6660 PetscErrorCode ierr; 6661 PetscMPIInt size; 6662 6663 PetscFunctionBegin; 6664 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 6665 6666 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6667 B->data = (void*)b; 6668 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6669 B->assembled = PETSC_FALSE; 6670 B->insertmode = NOT_SET_VALUES; 6671 b->size = size; 6672 6673 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr); 6674 6675 /* build cache for off array entries formed */ 6676 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6677 6678 b->donotstash = PETSC_FALSE; 6679 b->colmap = NULL; 6680 b->garray = NULL; 6681 b->roworiented = PETSC_TRUE; 6682 6683 /* stuff used for matrix vector multiply */ 6684 b->lvec = NULL; 6685 b->Mvctx = NULL; 6686 6687 /* stuff for MatGetRow() */ 6688 b->rowindices = NULL; 6689 b->rowvalues = NULL; 6690 b->getrowactive = PETSC_FALSE; 6691 6692 /* flexible pointer used in CUSPARSE classes */ 6693 b->spptr = NULL; 6694 6695 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6696 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6697 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6698 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6699 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6700 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6701 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6702 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6703 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6704 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6705 #if defined(PETSC_HAVE_CUDA) 6706 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr); 6707 #endif 6708 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6709 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr); 6710 #endif 6711 #if defined(PETSC_HAVE_MKL_SPARSE) 6712 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6713 #endif 6714 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6715 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6716 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6717 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr); 6718 #if defined(PETSC_HAVE_ELEMENTAL) 6719 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6720 #endif 6721 #if defined(PETSC_HAVE_SCALAPACK) 6722 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6723 #endif 6724 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6725 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6726 #if defined(PETSC_HAVE_HYPRE) 6727 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6728 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6729 #endif 6730 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6731 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6732 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ);CHKERRQ(ierr); 6733 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ);CHKERRQ(ierr); 6734 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6735 PetscFunctionReturn(0); 6736 } 6737 6738 /*@C 6739 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6740 and "off-diagonal" part of the matrix in CSR format. 6741 6742 Collective 6743 6744 Input Parameters: 6745 + comm - MPI communicator 6746 . m - number of local rows (Cannot be PETSC_DECIDE) 6747 . n - This value should be the same as the local size used in creating the 6748 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6749 calculated if N is given) For square matrices n is almost always m. 6750 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6751 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6752 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6753 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6754 . a - matrix values 6755 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6756 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6757 - oa - matrix values 6758 6759 Output Parameter: 6760 . mat - the matrix 6761 6762 Level: advanced 6763 6764 Notes: 6765 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6766 must free the arrays once the matrix has been destroyed and not before. 6767 6768 The i and j indices are 0 based 6769 6770 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6771 6772 This sets local rows and cannot be used to set off-processor values. 6773 6774 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6775 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6776 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6777 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6778 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6779 communication if it is known that only local entries will be set. 6780 6781 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6782 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6783 @*/ 6784 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6785 { 6786 PetscErrorCode ierr; 6787 Mat_MPIAIJ *maij; 6788 6789 PetscFunctionBegin; 6790 PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6791 PetscCheckFalse(i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6792 PetscCheckFalse(oi[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6793 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6794 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6795 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6796 maij = (Mat_MPIAIJ*) (*mat)->data; 6797 6798 (*mat)->preallocated = PETSC_TRUE; 6799 6800 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6801 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6802 6803 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6804 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6805 6806 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6807 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6808 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6809 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6810 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6811 PetscFunctionReturn(0); 6812 } 6813 6814 typedef struct { 6815 Mat *mp; /* intermediate products */ 6816 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6817 PetscInt cp; /* number of intermediate products */ 6818 6819 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6820 PetscInt *startsj_s,*startsj_r; 6821 PetscScalar *bufa; 6822 Mat P_oth; 6823 6824 /* may take advantage of merging product->B */ 6825 Mat Bloc; /* B-local by merging diag and off-diag */ 6826 6827 /* cusparse does not have support to split between symbolic and numeric phases. 6828 When api_user is true, we don't need to update the numerical values 6829 of the temporary storage */ 6830 PetscBool reusesym; 6831 6832 /* support for COO values insertion */ 6833 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6834 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6835 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6836 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6837 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6838 PetscMemType mtype; 6839 6840 /* customization */ 6841 PetscBool abmerge; 6842 PetscBool P_oth_bind; 6843 } MatMatMPIAIJBACKEND; 6844 6845 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6846 { 6847 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6848 PetscInt i; 6849 PetscErrorCode ierr; 6850 6851 PetscFunctionBegin; 6852 ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr); 6853 ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr); 6854 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr); 6855 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr); 6856 ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr); 6857 ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr); 6858 ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr); 6859 for (i = 0; i < mmdata->cp; i++) { 6860 ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr); 6861 } 6862 ierr = PetscFree2(mmdata->mp,mmdata->mptmp);CHKERRQ(ierr); 6863 ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr); 6864 ierr = PetscFree(mmdata->own);CHKERRQ(ierr); 6865 ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr); 6866 ierr = PetscFree(mmdata->off);CHKERRQ(ierr); 6867 ierr = PetscFree(mmdata);CHKERRQ(ierr); 6868 PetscFunctionReturn(0); 6869 } 6870 6871 /* Copy selected n entries with indices in idx[] of A to v[]. 6872 If idx is NULL, copy the whole data array of A to v[] 6873 */ 6874 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6875 { 6876 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6877 PetscErrorCode ierr; 6878 6879 PetscFunctionBegin; 6880 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr); 6881 if (f) { 6882 ierr = (*f)(A,n,idx,v);CHKERRQ(ierr); 6883 } else { 6884 const PetscScalar *vv; 6885 6886 ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr); 6887 if (n && idx) { 6888 PetscScalar *w = v; 6889 const PetscInt *oi = idx; 6890 PetscInt j; 6891 6892 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6893 } else { 6894 ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr); 6895 } 6896 ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr); 6897 } 6898 PetscFunctionReturn(0); 6899 } 6900 6901 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6902 { 6903 MatMatMPIAIJBACKEND *mmdata; 6904 PetscInt i,n_d,n_o; 6905 PetscErrorCode ierr; 6906 6907 PetscFunctionBegin; 6908 MatCheckProduct(C,1); 6909 PetscCheckFalse(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6910 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6911 if (!mmdata->reusesym) { /* update temporary matrices */ 6912 if (mmdata->P_oth) { 6913 ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6914 } 6915 if (mmdata->Bloc) { 6916 ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr); 6917 } 6918 } 6919 mmdata->reusesym = PETSC_FALSE; 6920 6921 for (i = 0; i < mmdata->cp; i++) { 6922 PetscCheckFalse(!mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6923 ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr); 6924 } 6925 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6926 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6927 6928 if (mmdata->mptmp[i]) continue; 6929 if (noff) { 6930 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6931 6932 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr); 6933 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr); 6934 n_o += noff; 6935 n_d += nown; 6936 } else { 6937 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6938 6939 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr); 6940 n_d += mm->nz; 6941 } 6942 } 6943 if (mmdata->hasoffproc) { /* offprocess insertion */ 6944 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6945 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6946 } 6947 ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr); 6948 PetscFunctionReturn(0); 6949 } 6950 6951 /* Support for Pt * A, A * P, or Pt * A * P */ 6952 #define MAX_NUMBER_INTERMEDIATE 4 6953 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6954 { 6955 Mat_Product *product = C->product; 6956 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6957 Mat_MPIAIJ *a,*p; 6958 MatMatMPIAIJBACKEND *mmdata; 6959 ISLocalToGlobalMapping P_oth_l2g = NULL; 6960 IS glob = NULL; 6961 const char *prefix; 6962 char pprefix[256]; 6963 const PetscInt *globidx,*P_oth_idx; 6964 PetscInt i,j,cp,m,n,M,N,*coo_i,*coo_j; 6965 PetscCount ncoo,ncoo_d,ncoo_o,ncoo_oown; 6966 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6967 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6968 /* a base offset; type-2: sparse with a local to global map table */ 6969 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6970 6971 MatProductType ptype; 6972 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6973 PetscMPIInt size; 6974 PetscErrorCode ierr; 6975 6976 PetscFunctionBegin; 6977 MatCheckProduct(C,1); 6978 PetscCheckFalse(product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6979 ptype = product->type; 6980 if (product->A->symmetric && ptype == MATPRODUCT_AtB) { 6981 ptype = MATPRODUCT_AB; 6982 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 6983 } 6984 switch (ptype) { 6985 case MATPRODUCT_AB: 6986 A = product->A; 6987 P = product->B; 6988 m = A->rmap->n; 6989 n = P->cmap->n; 6990 M = A->rmap->N; 6991 N = P->cmap->N; 6992 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6993 break; 6994 case MATPRODUCT_AtB: 6995 P = product->A; 6996 A = product->B; 6997 m = P->cmap->n; 6998 n = A->cmap->n; 6999 M = P->cmap->N; 7000 N = A->cmap->N; 7001 hasoffproc = PETSC_TRUE; 7002 break; 7003 case MATPRODUCT_PtAP: 7004 A = product->A; 7005 P = product->B; 7006 m = P->cmap->n; 7007 n = P->cmap->n; 7008 M = P->cmap->N; 7009 N = P->cmap->N; 7010 hasoffproc = PETSC_TRUE; 7011 break; 7012 default: 7013 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7014 } 7015 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr); 7016 if (size == 1) hasoffproc = PETSC_FALSE; 7017 7018 /* defaults */ 7019 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 7020 mp[i] = NULL; 7021 mptmp[i] = PETSC_FALSE; 7022 rmapt[i] = -1; 7023 cmapt[i] = -1; 7024 rmapa[i] = NULL; 7025 cmapa[i] = NULL; 7026 } 7027 7028 /* customization */ 7029 ierr = PetscNew(&mmdata);CHKERRQ(ierr); 7030 mmdata->reusesym = product->api_user; 7031 if (ptype == MATPRODUCT_AB) { 7032 if (product->api_user) { 7033 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 7034 ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 7035 ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 7036 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7037 } else { 7038 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 7039 ierr = PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 7040 ierr = PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 7041 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7042 } 7043 } else if (ptype == MATPRODUCT_PtAP) { 7044 if (product->api_user) { 7045 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 7046 ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 7047 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7048 } else { 7049 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 7050 ierr = PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 7051 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7052 } 7053 } 7054 a = (Mat_MPIAIJ*)A->data; 7055 p = (Mat_MPIAIJ*)P->data; 7056 ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr); 7057 ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr); 7058 ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr); 7059 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 7060 ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr); 7061 7062 cp = 0; 7063 switch (ptype) { 7064 case MATPRODUCT_AB: /* A * P */ 7065 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 7066 7067 /* A_diag * P_local (merged or not) */ 7068 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7069 /* P is product->B */ 7070 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 7071 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 7072 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 7073 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7074 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7075 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7076 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7077 mp[cp]->product->api_user = product->api_user; 7078 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7079 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7080 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7081 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 7082 rmapt[cp] = 1; 7083 cmapt[cp] = 2; 7084 cmapa[cp] = globidx; 7085 mptmp[cp] = PETSC_FALSE; 7086 cp++; 7087 } else { /* A_diag * P_diag and A_diag * P_off */ 7088 ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr); 7089 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 7090 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7091 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7092 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7093 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7094 mp[cp]->product->api_user = product->api_user; 7095 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7096 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7097 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7098 rmapt[cp] = 1; 7099 cmapt[cp] = 1; 7100 mptmp[cp] = PETSC_FALSE; 7101 cp++; 7102 ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr); 7103 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 7104 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7105 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7106 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7107 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7108 mp[cp]->product->api_user = product->api_user; 7109 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7110 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7111 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7112 rmapt[cp] = 1; 7113 cmapt[cp] = 2; 7114 cmapa[cp] = p->garray; 7115 mptmp[cp] = PETSC_FALSE; 7116 cp++; 7117 } 7118 7119 /* A_off * P_other */ 7120 if (mmdata->P_oth) { 7121 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); /* make P_oth use local col ids */ 7122 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 7123 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 7124 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 7125 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 7126 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 7127 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7128 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7129 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7130 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7131 mp[cp]->product->api_user = product->api_user; 7132 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7133 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7134 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7135 rmapt[cp] = 1; 7136 cmapt[cp] = 2; 7137 cmapa[cp] = P_oth_idx; 7138 mptmp[cp] = PETSC_FALSE; 7139 cp++; 7140 } 7141 break; 7142 7143 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7144 /* A is product->B */ 7145 ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 7146 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7147 ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 7148 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 7149 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7150 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7151 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7152 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7153 mp[cp]->product->api_user = product->api_user; 7154 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7155 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7156 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7157 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 7158 rmapt[cp] = 2; 7159 rmapa[cp] = globidx; 7160 cmapt[cp] = 2; 7161 cmapa[cp] = globidx; 7162 mptmp[cp] = PETSC_FALSE; 7163 cp++; 7164 } else { 7165 ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 7166 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 7167 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7168 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7169 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7170 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7171 mp[cp]->product->api_user = product->api_user; 7172 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7173 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7174 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7175 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 7176 rmapt[cp] = 1; 7177 cmapt[cp] = 2; 7178 cmapa[cp] = globidx; 7179 mptmp[cp] = PETSC_FALSE; 7180 cp++; 7181 ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 7182 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 7183 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7184 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7185 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7186 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7187 mp[cp]->product->api_user = product->api_user; 7188 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7189 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7190 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7191 rmapt[cp] = 2; 7192 rmapa[cp] = p->garray; 7193 cmapt[cp] = 2; 7194 cmapa[cp] = globidx; 7195 mptmp[cp] = PETSC_FALSE; 7196 cp++; 7197 } 7198 break; 7199 case MATPRODUCT_PtAP: 7200 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 7201 /* P is product->B */ 7202 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 7203 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 7204 ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr); 7205 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7206 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7207 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7208 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7209 mp[cp]->product->api_user = product->api_user; 7210 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7211 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7212 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7213 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 7214 rmapt[cp] = 2; 7215 rmapa[cp] = globidx; 7216 cmapt[cp] = 2; 7217 cmapa[cp] = globidx; 7218 mptmp[cp] = PETSC_FALSE; 7219 cp++; 7220 if (mmdata->P_oth) { 7221 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); 7222 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 7223 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 7224 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 7225 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 7226 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 7227 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7228 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7229 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7230 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7231 mp[cp]->product->api_user = product->api_user; 7232 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7233 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7234 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7235 mptmp[cp] = PETSC_TRUE; 7236 cp++; 7237 ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr); 7238 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 7239 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 7240 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr); 7241 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 7242 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 7243 mp[cp]->product->api_user = product->api_user; 7244 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 7245 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7246 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 7247 rmapt[cp] = 2; 7248 rmapa[cp] = globidx; 7249 cmapt[cp] = 2; 7250 cmapa[cp] = P_oth_idx; 7251 mptmp[cp] = PETSC_FALSE; 7252 cp++; 7253 } 7254 break; 7255 default: 7256 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7257 } 7258 /* sanity check */ 7259 if (size > 1) for (i = 0; i < cp; i++) PetscCheckFalse(rmapt[i] == 2 && !hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i); 7260 7261 ierr = PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);CHKERRQ(ierr); 7262 for (i = 0; i < cp; i++) { 7263 mmdata->mp[i] = mp[i]; 7264 mmdata->mptmp[i] = mptmp[i]; 7265 } 7266 mmdata->cp = cp; 7267 C->product->data = mmdata; 7268 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7269 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7270 7271 /* memory type */ 7272 mmdata->mtype = PETSC_MEMTYPE_HOST; 7273 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr); 7274 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr); 7275 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7276 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7277 7278 /* prepare coo coordinates for values insertion */ 7279 7280 /* count total nonzeros of those intermediate seqaij Mats 7281 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7282 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7283 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7284 */ 7285 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7286 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7287 if (mptmp[cp]) continue; 7288 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7289 const PetscInt *rmap = rmapa[cp]; 7290 const PetscInt mr = mp[cp]->rmap->n; 7291 const PetscInt rs = C->rmap->rstart; 7292 const PetscInt re = C->rmap->rend; 7293 const PetscInt *ii = mm->i; 7294 for (i = 0; i < mr; i++) { 7295 const PetscInt gr = rmap[i]; 7296 const PetscInt nz = ii[i+1] - ii[i]; 7297 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7298 else ncoo_oown += nz; /* this row is local */ 7299 } 7300 } else ncoo_d += mm->nz; 7301 } 7302 7303 /* 7304 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7305 7306 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7307 7308 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7309 7310 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7311 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7312 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7313 7314 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7315 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7316 */ 7317 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); /* +1 to make a csr-like data structure */ 7318 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr); 7319 7320 /* gather (i,j) of nonzeros inserted by remote procs */ 7321 if (hasoffproc) { 7322 PetscSF msf; 7323 PetscInt ncoo2,*coo_i2,*coo_j2; 7324 7325 ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr); 7326 ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr); 7327 ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); /* to collect (i,j) of entries to be sent to others */ 7328 7329 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7330 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7331 PetscInt *idxoff = mmdata->off[cp]; 7332 PetscInt *idxown = mmdata->own[cp]; 7333 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7334 const PetscInt *rmap = rmapa[cp]; 7335 const PetscInt *cmap = cmapa[cp]; 7336 const PetscInt *ii = mm->i; 7337 PetscInt *coi = coo_i + ncoo_o; 7338 PetscInt *coj = coo_j + ncoo_o; 7339 const PetscInt mr = mp[cp]->rmap->n; 7340 const PetscInt rs = C->rmap->rstart; 7341 const PetscInt re = C->rmap->rend; 7342 const PetscInt cs = C->cmap->rstart; 7343 for (i = 0; i < mr; i++) { 7344 const PetscInt *jj = mm->j + ii[i]; 7345 const PetscInt gr = rmap[i]; 7346 const PetscInt nz = ii[i+1] - ii[i]; 7347 if (gr < rs || gr >= re) { /* this is an offproc row */ 7348 for (j = ii[i]; j < ii[i+1]; j++) { 7349 *coi++ = gr; 7350 *idxoff++ = j; 7351 } 7352 if (!cmapt[cp]) { /* already global */ 7353 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7354 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7355 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7356 } else { /* offdiag */ 7357 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7358 } 7359 ncoo_o += nz; 7360 } else { /* this is a local row */ 7361 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 7362 } 7363 } 7364 } 7365 mmdata->off[cp + 1] = idxoff; 7366 mmdata->own[cp + 1] = idxown; 7367 } 7368 7369 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 7370 ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr); 7371 ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr); 7372 ierr = PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);CHKERRQ(ierr); 7373 ncoo = ncoo_d + ncoo_oown + ncoo2; 7374 ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr); 7375 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); /* put (i,j) of remote nonzeros at back */ 7376 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 7377 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 7378 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 7379 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 7380 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7381 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr); 7382 coo_i = coo_i2; 7383 coo_j = coo_j2; 7384 } else { /* no offproc values insertion */ 7385 ncoo = ncoo_d; 7386 ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr); 7387 7388 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 7389 ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr); 7390 ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr); 7391 } 7392 mmdata->hasoffproc = hasoffproc; 7393 7394 /* gather (i,j) of nonzeros inserted locally */ 7395 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7396 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7397 PetscInt *coi = coo_i + ncoo_d; 7398 PetscInt *coj = coo_j + ncoo_d; 7399 const PetscInt *jj = mm->j; 7400 const PetscInt *ii = mm->i; 7401 const PetscInt *cmap = cmapa[cp]; 7402 const PetscInt *rmap = rmapa[cp]; 7403 const PetscInt mr = mp[cp]->rmap->n; 7404 const PetscInt rs = C->rmap->rstart; 7405 const PetscInt re = C->rmap->rend; 7406 const PetscInt cs = C->cmap->rstart; 7407 7408 if (mptmp[cp]) continue; 7409 if (rmapt[cp] == 1) { /* consecutive rows */ 7410 /* fill coo_i */ 7411 for (i = 0; i < mr; i++) { 7412 const PetscInt gr = i + rs; 7413 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 7414 } 7415 /* fill coo_j */ 7416 if (!cmapt[cp]) { /* type-0, already global */ 7417 ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr); 7418 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7419 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7420 } else { /* type-2, local to global for sparse columns */ 7421 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7422 } 7423 ncoo_d += mm->nz; 7424 } else if (rmapt[cp] == 2) { /* sparse rows */ 7425 for (i = 0; i < mr; i++) { 7426 const PetscInt *jj = mm->j + ii[i]; 7427 const PetscInt gr = rmap[i]; 7428 const PetscInt nz = ii[i+1] - ii[i]; 7429 if (gr >= rs && gr < re) { /* local rows */ 7430 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 7431 if (!cmapt[cp]) { /* type-0, already global */ 7432 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7433 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7434 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7435 } else { /* type-2, local to global for sparse columns */ 7436 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7437 } 7438 ncoo_d += nz; 7439 } 7440 } 7441 } 7442 } 7443 if (glob) { 7444 ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr); 7445 } 7446 ierr = ISDestroy(&glob);CHKERRQ(ierr); 7447 if (P_oth_l2g) { 7448 ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 7449 } 7450 ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr); 7451 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7452 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr); 7453 7454 /* preallocate with COO data */ 7455 ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr); 7456 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 7457 PetscFunctionReturn(0); 7458 } 7459 7460 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7461 { 7462 Mat_Product *product = mat->product; 7463 PetscErrorCode ierr; 7464 #if defined(PETSC_HAVE_DEVICE) 7465 PetscBool match = PETSC_FALSE; 7466 PetscBool usecpu = PETSC_FALSE; 7467 #else 7468 PetscBool match = PETSC_TRUE; 7469 #endif 7470 7471 PetscFunctionBegin; 7472 MatCheckProduct(mat,1); 7473 #if defined(PETSC_HAVE_DEVICE) 7474 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7475 ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr); 7476 } 7477 if (match) { /* we can always fallback to the CPU if requested */ 7478 switch (product->type) { 7479 case MATPRODUCT_AB: 7480 if (product->api_user) { 7481 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 7482 ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7483 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7484 } else { 7485 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 7486 ierr = PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7487 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7488 } 7489 break; 7490 case MATPRODUCT_AtB: 7491 if (product->api_user) { 7492 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr); 7493 ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7494 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7495 } else { 7496 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr); 7497 ierr = PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7498 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7499 } 7500 break; 7501 case MATPRODUCT_PtAP: 7502 if (product->api_user) { 7503 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 7504 ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7505 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7506 } else { 7507 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 7508 ierr = PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7509 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7510 } 7511 break; 7512 default: 7513 break; 7514 } 7515 match = (PetscBool)!usecpu; 7516 } 7517 #endif 7518 if (match) { 7519 switch (product->type) { 7520 case MATPRODUCT_AB: 7521 case MATPRODUCT_AtB: 7522 case MATPRODUCT_PtAP: 7523 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7524 break; 7525 default: 7526 break; 7527 } 7528 } 7529 /* fallback to MPIAIJ ops */ 7530 if (!mat->ops->productsymbolic) { 7531 ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr); 7532 } 7533 PetscFunctionReturn(0); 7534 } 7535 7536 /* 7537 Special version for direct calls from Fortran 7538 */ 7539 #include <petsc/private/fortranimpl.h> 7540 7541 /* Change these macros so can be used in void function */ 7542 /* Identical to CHKERRV, except it assigns to *_ierr */ 7543 #undef CHKERRQ 7544 #define CHKERRQ(ierr) do { \ 7545 PetscErrorCode ierr_msv_mpiaij = (ierr); \ 7546 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7547 *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \ 7548 return; \ 7549 } \ 7550 } while (0) 7551 7552 #undef SETERRQ 7553 #define SETERRQ(comm,ierr,...) do { \ 7554 *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \ 7555 return; \ 7556 } while (0) 7557 7558 #if defined(PETSC_HAVE_FORTRAN_CAPS) 7559 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 7560 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 7561 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 7562 #else 7563 #endif 7564 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 7565 { 7566 Mat mat = *mmat; 7567 PetscInt m = *mm, n = *mn; 7568 InsertMode addv = *maddv; 7569 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 7570 PetscScalar value; 7571 PetscErrorCode ierr; 7572 7573 MatCheckPreallocated(mat,1); 7574 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 7575 else PetscCheckFalse(mat->insertmode != addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 7576 { 7577 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 7578 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 7579 PetscBool roworiented = aij->roworiented; 7580 7581 /* Some Variables required in the macro */ 7582 Mat A = aij->A; 7583 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 7584 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 7585 MatScalar *aa; 7586 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 7587 Mat B = aij->B; 7588 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 7589 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 7590 MatScalar *ba; 7591 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 7592 * cannot use "#if defined" inside a macro. */ 7593 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 7594 7595 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 7596 PetscInt nonew = a->nonew; 7597 MatScalar *ap1,*ap2; 7598 7599 PetscFunctionBegin; 7600 ierr = MatSeqAIJGetArray(A,&aa);CHKERRQ(ierr); 7601 ierr = MatSeqAIJGetArray(B,&ba);CHKERRQ(ierr); 7602 for (i=0; i<m; i++) { 7603 if (im[i] < 0) continue; 7604 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 7605 if (im[i] >= rstart && im[i] < rend) { 7606 row = im[i] - rstart; 7607 lastcol1 = -1; 7608 rp1 = aj + ai[row]; 7609 ap1 = aa + ai[row]; 7610 rmax1 = aimax[row]; 7611 nrow1 = ailen[row]; 7612 low1 = 0; 7613 high1 = nrow1; 7614 lastcol2 = -1; 7615 rp2 = bj + bi[row]; 7616 ap2 = ba + bi[row]; 7617 rmax2 = bimax[row]; 7618 nrow2 = bilen[row]; 7619 low2 = 0; 7620 high2 = nrow2; 7621 7622 for (j=0; j<n; j++) { 7623 if (roworiented) value = v[i*n+j]; 7624 else value = v[i+j*m]; 7625 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 7626 if (in[j] >= cstart && in[j] < cend) { 7627 col = in[j] - cstart; 7628 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 7629 } else if (in[j] < 0) continue; 7630 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 7631 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 7632 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 7633 } else { 7634 if (mat->was_assembled) { 7635 if (!aij->colmap) { 7636 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 7637 } 7638 #if defined(PETSC_USE_CTABLE) 7639 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 7640 col--; 7641 #else 7642 col = aij->colmap[in[j]] - 1; 7643 #endif 7644 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 7645 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 7646 col = in[j]; 7647 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 7648 B = aij->B; 7649 b = (Mat_SeqAIJ*)B->data; 7650 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 7651 rp2 = bj + bi[row]; 7652 ap2 = ba + bi[row]; 7653 rmax2 = bimax[row]; 7654 nrow2 = bilen[row]; 7655 low2 = 0; 7656 high2 = nrow2; 7657 bm = aij->B->rmap->n; 7658 ba = b->a; 7659 inserted = PETSC_FALSE; 7660 } 7661 } else col = in[j]; 7662 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 7663 } 7664 } 7665 } else if (!aij->donotstash) { 7666 if (roworiented) { 7667 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 7668 } else { 7669 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 7670 } 7671 } 7672 } 7673 ierr = MatSeqAIJRestoreArray(A,&aa);CHKERRQ(ierr); 7674 ierr = MatSeqAIJRestoreArray(B,&ba);CHKERRQ(ierr); 7675 } 7676 PetscFunctionReturnVoid(); 7677 } 7678 /* Undefining these here since they were redefined from their original definition above! No 7679 * other PETSc functions should be defined past this point, as it is impossible to recover the 7680 * original definitions */ 7681 #undef CHKERRQ 7682 #undef SETERRQ 7683