1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 63 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 64 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 65 * to differ from the parent matrix. */ 66 if (a->lvec) { 67 ierr = VecBindToCPU(a->lvec,flg);CHKERRQ(ierr); 68 } 69 if (a->diag) { 70 ierr = VecBindToCPU(a->diag,flg);CHKERRQ(ierr); 71 } 72 73 PetscFunctionReturn(0); 74 } 75 76 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 77 { 78 PetscErrorCode ierr; 79 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 80 81 PetscFunctionBegin; 82 if (mat->A) { 83 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 84 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 85 } 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 90 { 91 PetscErrorCode ierr; 92 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 93 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 94 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 95 const PetscInt *ia,*ib; 96 const MatScalar *aa,*bb,*aav,*bav; 97 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 98 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 99 100 PetscFunctionBegin; 101 *keptrows = NULL; 102 103 ia = a->i; 104 ib = b->i; 105 ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr); 106 ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr); 107 for (i=0; i<m; i++) { 108 na = ia[i+1] - ia[i]; 109 nb = ib[i+1] - ib[i]; 110 if (!na && !nb) { 111 cnt++; 112 goto ok1; 113 } 114 aa = aav + ia[i]; 115 for (j=0; j<na; j++) { 116 if (aa[j] != 0.0) goto ok1; 117 } 118 bb = bav + ib[i]; 119 for (j=0; j <nb; j++) { 120 if (bb[j] != 0.0) goto ok1; 121 } 122 cnt++; 123 ok1:; 124 } 125 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr); 126 if (!n0rows) { 127 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 128 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 132 cnt = 0; 133 for (i=0; i<m; i++) { 134 na = ia[i+1] - ia[i]; 135 nb = ib[i+1] - ib[i]; 136 if (!na && !nb) continue; 137 aa = aav + ia[i]; 138 for (j=0; j<na;j++) { 139 if (aa[j] != 0.0) { 140 rows[cnt++] = rstart + i; 141 goto ok2; 142 } 143 } 144 bb = bav + ib[i]; 145 for (j=0; j<nb; j++) { 146 if (bb[j] != 0.0) { 147 rows[cnt++] = rstart + i; 148 goto ok2; 149 } 150 } 151 ok2:; 152 } 153 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 154 ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr); 155 ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr); 156 PetscFunctionReturn(0); 157 } 158 159 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 160 { 161 PetscErrorCode ierr; 162 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 163 PetscBool cong; 164 165 PetscFunctionBegin; 166 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 167 if (Y->assembled && cong) { 168 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 169 } else { 170 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 171 } 172 PetscFunctionReturn(0); 173 } 174 175 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 176 { 177 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 178 PetscErrorCode ierr; 179 PetscInt i,rstart,nrows,*rows; 180 181 PetscFunctionBegin; 182 *zrows = NULL; 183 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 184 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 185 for (i=0; i<nrows; i++) rows[i] += rstart; 186 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 187 PetscFunctionReturn(0); 188 } 189 190 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 191 { 192 PetscErrorCode ierr; 193 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 194 PetscInt i,m,n,*garray = aij->garray; 195 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 196 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 197 PetscReal *work; 198 const PetscScalar *dummy; 199 200 PetscFunctionBegin; 201 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 202 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 203 ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr); 204 ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr); 205 ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr); 206 ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr); 207 if (type == NORM_2) { 208 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 209 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 210 } 211 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 212 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 213 } 214 } else if (type == NORM_1) { 215 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 216 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 217 } 218 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 219 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 220 } 221 } else if (type == NORM_INFINITY) { 222 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 223 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 224 } 225 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 226 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 227 } 228 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 229 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 230 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 231 } 232 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 233 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 234 } 235 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 236 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 237 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 238 } 239 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 240 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 241 } 242 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 243 if (type == NORM_INFINITY) { 244 ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 245 } else { 246 ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 247 } 248 ierr = PetscFree(work);CHKERRQ(ierr); 249 if (type == NORM_2) { 250 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 251 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 252 for (i=0; i<n; i++) reductions[i] /= m; 253 } 254 PetscFunctionReturn(0); 255 } 256 257 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 258 { 259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 260 IS sis,gis; 261 PetscErrorCode ierr; 262 const PetscInt *isis,*igis; 263 PetscInt n,*iis,nsis,ngis,rstart,i; 264 265 PetscFunctionBegin; 266 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 267 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 268 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 269 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 270 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 271 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 272 273 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 274 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 275 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 276 n = ngis + nsis; 277 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 278 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 279 for (i=0; i<n; i++) iis[i] += rstart; 280 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 281 282 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 283 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 284 ierr = ISDestroy(&sis);CHKERRQ(ierr); 285 ierr = ISDestroy(&gis);CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289 /* 290 Local utility routine that creates a mapping from the global column 291 number to the local number in the off-diagonal part of the local 292 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 293 a slightly higher hash table cost; without it it is not scalable (each processor 294 has an order N integer array but is fast to access. 295 */ 296 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 297 { 298 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 299 PetscErrorCode ierr; 300 PetscInt n = aij->B->cmap->n,i; 301 302 PetscFunctionBegin; 303 if (n && !aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 304 #if defined(PETSC_USE_CTABLE) 305 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 306 for (i=0; i<n; i++) { 307 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 308 } 309 #else 310 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 311 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 312 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 313 #endif 314 PetscFunctionReturn(0); 315 } 316 317 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 318 { \ 319 if (col <= lastcol1) low1 = 0; \ 320 else high1 = nrow1; \ 321 lastcol1 = col;\ 322 while (high1-low1 > 5) { \ 323 t = (low1+high1)/2; \ 324 if (rp1[t] > col) high1 = t; \ 325 else low1 = t; \ 326 } \ 327 for (_i=low1; _i<high1; _i++) { \ 328 if (rp1[_i] > col) break; \ 329 if (rp1[_i] == col) { \ 330 if (addv == ADD_VALUES) { \ 331 ap1[_i] += value; \ 332 /* Not sure LogFlops will slow dow the code or not */ \ 333 (void)PetscLogFlops(1.0); \ 334 } \ 335 else ap1[_i] = value; \ 336 inserted = PETSC_TRUE; \ 337 goto a_noinsert; \ 338 } \ 339 } \ 340 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 341 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 342 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 343 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 344 N = nrow1++ - 1; a->nz++; high1++; \ 345 /* shift up all the later entries in this row */ \ 346 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 347 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 348 rp1[_i] = col; \ 349 ap1[_i] = value; \ 350 A->nonzerostate++;\ 351 a_noinsert: ; \ 352 ailen[row] = nrow1; \ 353 } 354 355 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 356 { \ 357 if (col <= lastcol2) low2 = 0; \ 358 else high2 = nrow2; \ 359 lastcol2 = col; \ 360 while (high2-low2 > 5) { \ 361 t = (low2+high2)/2; \ 362 if (rp2[t] > col) high2 = t; \ 363 else low2 = t; \ 364 } \ 365 for (_i=low2; _i<high2; _i++) { \ 366 if (rp2[_i] > col) break; \ 367 if (rp2[_i] == col) { \ 368 if (addv == ADD_VALUES) { \ 369 ap2[_i] += value; \ 370 (void)PetscLogFlops(1.0); \ 371 } \ 372 else ap2[_i] = value; \ 373 inserted = PETSC_TRUE; \ 374 goto b_noinsert; \ 375 } \ 376 } \ 377 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 378 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 379 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 380 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 381 N = nrow2++ - 1; b->nz++; high2++; \ 382 /* shift up all the later entries in this row */ \ 383 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 384 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 385 rp2[_i] = col; \ 386 ap2[_i] = value; \ 387 B->nonzerostate++; \ 388 b_noinsert: ; \ 389 bilen[row] = nrow2; \ 390 } 391 392 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 393 { 394 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 395 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 396 PetscErrorCode ierr; 397 PetscInt l,*garray = mat->garray,diag; 398 399 PetscFunctionBegin; 400 /* code only works for square matrices A */ 401 402 /* find size of row to the left of the diagonal part */ 403 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 404 row = row - diag; 405 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 406 if (garray[b->j[b->i[row]+l]] > diag) break; 407 } 408 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 409 410 /* diagonal part */ 411 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 412 413 /* right of diagonal part */ 414 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 415 #if defined(PETSC_HAVE_DEVICE) 416 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 417 #endif 418 PetscFunctionReturn(0); 419 } 420 421 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 422 { 423 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 424 PetscScalar value = 0.0; 425 PetscErrorCode ierr; 426 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 427 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 428 PetscBool roworiented = aij->roworiented; 429 430 /* Some Variables required in the macro */ 431 Mat A = aij->A; 432 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 433 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 434 PetscBool ignorezeroentries = a->ignorezeroentries; 435 Mat B = aij->B; 436 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 437 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 438 MatScalar *aa,*ba; 439 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 440 * cannot use "#if defined" inside a macro. */ 441 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 442 443 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 444 PetscInt nonew; 445 MatScalar *ap1,*ap2; 446 447 PetscFunctionBegin; 448 #if defined(PETSC_HAVE_DEVICE) 449 if (A->offloadmask == PETSC_OFFLOAD_GPU) { 450 const PetscScalar *dummy; 451 ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr); 452 ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr); 453 } 454 if (B->offloadmask == PETSC_OFFLOAD_GPU) { 455 const PetscScalar *dummy; 456 ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr); 457 ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr); 458 } 459 #endif 460 aa = a->a; 461 ba = b->a; 462 for (i=0; i<m; i++) { 463 if (im[i] < 0) continue; 464 if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 465 if (im[i] >= rstart && im[i] < rend) { 466 row = im[i] - rstart; 467 lastcol1 = -1; 468 rp1 = aj + ai[row]; 469 ap1 = aa + ai[row]; 470 rmax1 = aimax[row]; 471 nrow1 = ailen[row]; 472 low1 = 0; 473 high1 = nrow1; 474 lastcol2 = -1; 475 rp2 = bj + bi[row]; 476 ap2 = ba + bi[row]; 477 rmax2 = bimax[row]; 478 nrow2 = bilen[row]; 479 low2 = 0; 480 high2 = nrow2; 481 482 for (j=0; j<n; j++) { 483 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 484 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 485 if (in[j] >= cstart && in[j] < cend) { 486 col = in[j] - cstart; 487 nonew = a->nonew; 488 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 489 #if defined(PETSC_HAVE_DEVICE) 490 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 491 #endif 492 } else if (in[j] < 0) continue; 493 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 494 else { 495 if (mat->was_assembled) { 496 if (!aij->colmap) { 497 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 498 } 499 #if defined(PETSC_USE_CTABLE) 500 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 501 col--; 502 #else 503 col = aij->colmap[in[j]] - 1; 504 #endif 505 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 506 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 507 col = in[j]; 508 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 509 B = aij->B; 510 b = (Mat_SeqAIJ*)B->data; 511 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 512 rp2 = bj + bi[row]; 513 ap2 = ba + bi[row]; 514 rmax2 = bimax[row]; 515 nrow2 = bilen[row]; 516 low2 = 0; 517 high2 = nrow2; 518 bm = aij->B->rmap->n; 519 ba = b->a; 520 inserted = PETSC_FALSE; 521 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 522 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 523 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 524 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 525 } 526 } else col = in[j]; 527 nonew = b->nonew; 528 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 529 #if defined(PETSC_HAVE_DEVICE) 530 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 531 #endif 532 } 533 } 534 } else { 535 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 536 if (!aij->donotstash) { 537 mat->assembled = PETSC_FALSE; 538 if (roworiented) { 539 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 540 } else { 541 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 542 } 543 } 544 } 545 } 546 PetscFunctionReturn(0); 547 } 548 549 /* 550 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 551 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 552 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 553 */ 554 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 555 { 556 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 557 Mat A = aij->A; /* diagonal part of the matrix */ 558 Mat B = aij->B; /* offdiagonal part of the matrix */ 559 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 560 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 561 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 562 PetscInt *ailen = a->ilen,*aj = a->j; 563 PetscInt *bilen = b->ilen,*bj = b->j; 564 PetscInt am = aij->A->rmap->n,j; 565 PetscInt diag_so_far = 0,dnz; 566 PetscInt offd_so_far = 0,onz; 567 568 PetscFunctionBegin; 569 /* Iterate over all rows of the matrix */ 570 for (j=0; j<am; j++) { 571 dnz = onz = 0; 572 /* Iterate over all non-zero columns of the current row */ 573 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 574 /* If column is in the diagonal */ 575 if (mat_j[col] >= cstart && mat_j[col] < cend) { 576 aj[diag_so_far++] = mat_j[col] - cstart; 577 dnz++; 578 } else { /* off-diagonal entries */ 579 bj[offd_so_far++] = mat_j[col]; 580 onz++; 581 } 582 } 583 ailen[j] = dnz; 584 bilen[j] = onz; 585 } 586 PetscFunctionReturn(0); 587 } 588 589 /* 590 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 591 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 592 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 593 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 594 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 595 */ 596 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 597 { 598 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 599 Mat A = aij->A; /* diagonal part of the matrix */ 600 Mat B = aij->B; /* offdiagonal part of the matrix */ 601 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 602 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 603 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 604 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 605 PetscInt *ailen = a->ilen,*aj = a->j; 606 PetscInt *bilen = b->ilen,*bj = b->j; 607 PetscInt am = aij->A->rmap->n,j; 608 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 609 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 610 PetscScalar *aa = a->a,*ba = b->a; 611 612 PetscFunctionBegin; 613 /* Iterate over all rows of the matrix */ 614 for (j=0; j<am; j++) { 615 dnz_row = onz_row = 0; 616 rowstart_offd = full_offd_i[j]; 617 rowstart_diag = full_diag_i[j]; 618 /* Iterate over all non-zero columns of the current row */ 619 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 620 /* If column is in the diagonal */ 621 if (mat_j[col] >= cstart && mat_j[col] < cend) { 622 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 623 aa[rowstart_diag+dnz_row] = mat_a[col]; 624 dnz_row++; 625 } else { /* off-diagonal entries */ 626 bj[rowstart_offd+onz_row] = mat_j[col]; 627 ba[rowstart_offd+onz_row] = mat_a[col]; 628 onz_row++; 629 } 630 } 631 ailen[j] = dnz_row; 632 bilen[j] = onz_row; 633 } 634 PetscFunctionReturn(0); 635 } 636 637 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 638 { 639 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 640 PetscErrorCode ierr; 641 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 642 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 643 644 PetscFunctionBegin; 645 for (i=0; i<m; i++) { 646 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 647 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 648 if (idxm[i] >= rstart && idxm[i] < rend) { 649 row = idxm[i] - rstart; 650 for (j=0; j<n; j++) { 651 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 652 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 653 if (idxn[j] >= cstart && idxn[j] < cend) { 654 col = idxn[j] - cstart; 655 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 656 } else { 657 if (!aij->colmap) { 658 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 659 } 660 #if defined(PETSC_USE_CTABLE) 661 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 662 col--; 663 #else 664 col = aij->colmap[idxn[j]] - 1; 665 #endif 666 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 667 else { 668 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 669 } 670 } 671 } 672 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 673 } 674 PetscFunctionReturn(0); 675 } 676 677 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 678 { 679 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 680 PetscErrorCode ierr; 681 PetscInt nstash,reallocs; 682 683 PetscFunctionBegin; 684 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 685 686 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 687 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 688 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 689 PetscFunctionReturn(0); 690 } 691 692 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 693 { 694 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 695 PetscErrorCode ierr; 696 PetscMPIInt n; 697 PetscInt i,j,rstart,ncols,flg; 698 PetscInt *row,*col; 699 PetscBool other_disassembled; 700 PetscScalar *val; 701 702 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 703 704 PetscFunctionBegin; 705 if (!aij->donotstash && !mat->nooffprocentries) { 706 while (1) { 707 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 708 if (!flg) break; 709 710 for (i=0; i<n;) { 711 /* Now identify the consecutive vals belonging to the same row */ 712 for (j=i,rstart=row[j]; j<n; j++) { 713 if (row[j] != rstart) break; 714 } 715 if (j < n) ncols = j-i; 716 else ncols = n-i; 717 /* Now assemble all these values with a single function call */ 718 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 719 i = j; 720 } 721 } 722 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 723 } 724 #if defined(PETSC_HAVE_DEVICE) 725 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 726 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 727 if (mat->boundtocpu) { 728 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 729 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 730 } 731 #endif 732 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 733 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 734 735 /* determine if any processor has disassembled, if so we must 736 also disassemble ourself, in order that we may reassemble. */ 737 /* 738 if nonzero structure of submatrix B cannot change then we know that 739 no processor disassembled thus we can skip this stuff 740 */ 741 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 742 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 743 if (mat->was_assembled && !other_disassembled) { 744 #if defined(PETSC_HAVE_DEVICE) 745 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 746 #endif 747 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 748 } 749 } 750 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 751 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 752 } 753 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 754 #if defined(PETSC_HAVE_DEVICE) 755 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 756 #endif 757 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 758 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 759 760 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 761 762 aij->rowvalues = NULL; 763 764 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 765 766 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 767 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 768 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 769 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 770 } 771 #if defined(PETSC_HAVE_DEVICE) 772 mat->offloadmask = PETSC_OFFLOAD_BOTH; 773 #endif 774 PetscFunctionReturn(0); 775 } 776 777 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 778 { 779 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 780 PetscErrorCode ierr; 781 782 PetscFunctionBegin; 783 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 784 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 785 PetscFunctionReturn(0); 786 } 787 788 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 789 { 790 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 791 PetscObjectState sA, sB; 792 PetscInt *lrows; 793 PetscInt r, len; 794 PetscBool cong, lch, gch; 795 PetscErrorCode ierr; 796 797 PetscFunctionBegin; 798 /* get locally owned rows */ 799 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 800 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 801 /* fix right hand side if needed */ 802 if (x && b) { 803 const PetscScalar *xx; 804 PetscScalar *bb; 805 806 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 807 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 808 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 809 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 810 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 811 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 812 } 813 814 sA = mat->A->nonzerostate; 815 sB = mat->B->nonzerostate; 816 817 if (diag != 0.0 && cong) { 818 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 819 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 820 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 821 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 822 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 823 PetscInt nnwA, nnwB; 824 PetscBool nnzA, nnzB; 825 826 nnwA = aijA->nonew; 827 nnwB = aijB->nonew; 828 nnzA = aijA->keepnonzeropattern; 829 nnzB = aijB->keepnonzeropattern; 830 if (!nnzA) { 831 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 832 aijA->nonew = 0; 833 } 834 if (!nnzB) { 835 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 836 aijB->nonew = 0; 837 } 838 /* Must zero here before the next loop */ 839 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 840 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 841 for (r = 0; r < len; ++r) { 842 const PetscInt row = lrows[r] + A->rmap->rstart; 843 if (row >= A->cmap->N) continue; 844 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 845 } 846 aijA->nonew = nnwA; 847 aijB->nonew = nnwB; 848 } else { 849 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 850 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 851 } 852 ierr = PetscFree(lrows);CHKERRQ(ierr); 853 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 854 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 855 856 /* reduce nonzerostate */ 857 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 858 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 859 if (gch) A->nonzerostate++; 860 PetscFunctionReturn(0); 861 } 862 863 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 864 { 865 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 866 PetscErrorCode ierr; 867 PetscMPIInt n = A->rmap->n; 868 PetscInt i,j,r,m,len = 0; 869 PetscInt *lrows,*owners = A->rmap->range; 870 PetscMPIInt p = 0; 871 PetscSFNode *rrows; 872 PetscSF sf; 873 const PetscScalar *xx; 874 PetscScalar *bb,*mask; 875 Vec xmask,lmask; 876 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 877 const PetscInt *aj, *ii,*ridx; 878 PetscScalar *aa; 879 880 PetscFunctionBegin; 881 /* Create SF where leaves are input rows and roots are owned rows */ 882 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 883 for (r = 0; r < n; ++r) lrows[r] = -1; 884 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 885 for (r = 0; r < N; ++r) { 886 const PetscInt idx = rows[r]; 887 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 888 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 889 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 890 } 891 rrows[r].rank = p; 892 rrows[r].index = rows[r] - owners[p]; 893 } 894 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 895 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 896 /* Collect flags for rows to be zeroed */ 897 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 898 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 899 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 900 /* Compress and put in row numbers */ 901 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 902 /* zero diagonal part of matrix */ 903 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 904 /* handle off diagonal part of matrix */ 905 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 906 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 907 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 908 for (i=0; i<len; i++) bb[lrows[i]] = 1; 909 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 910 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 911 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 912 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 913 if (x && b) { /* this code is buggy when the row and column layout don't match */ 914 PetscBool cong; 915 916 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 917 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 918 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 919 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 920 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 921 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 922 } 923 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 924 /* remove zeroed rows of off diagonal matrix */ 925 ii = aij->i; 926 for (i=0; i<len; i++) { 927 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 928 } 929 /* loop over all elements of off process part of matrix zeroing removed columns*/ 930 if (aij->compressedrow.use) { 931 m = aij->compressedrow.nrows; 932 ii = aij->compressedrow.i; 933 ridx = aij->compressedrow.rindex; 934 for (i=0; i<m; i++) { 935 n = ii[i+1] - ii[i]; 936 aj = aij->j + ii[i]; 937 aa = aij->a + ii[i]; 938 939 for (j=0; j<n; j++) { 940 if (PetscAbsScalar(mask[*aj])) { 941 if (b) bb[*ridx] -= *aa*xx[*aj]; 942 *aa = 0.0; 943 } 944 aa++; 945 aj++; 946 } 947 ridx++; 948 } 949 } else { /* do not use compressed row format */ 950 m = l->B->rmap->n; 951 for (i=0; i<m; i++) { 952 n = ii[i+1] - ii[i]; 953 aj = aij->j + ii[i]; 954 aa = aij->a + ii[i]; 955 for (j=0; j<n; j++) { 956 if (PetscAbsScalar(mask[*aj])) { 957 if (b) bb[i] -= *aa*xx[*aj]; 958 *aa = 0.0; 959 } 960 aa++; 961 aj++; 962 } 963 } 964 } 965 if (x && b) { 966 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 967 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 968 } 969 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 970 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 971 ierr = PetscFree(lrows);CHKERRQ(ierr); 972 973 /* only change matrix nonzero state if pattern was allowed to be changed */ 974 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 975 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 976 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 977 } 978 PetscFunctionReturn(0); 979 } 980 981 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 982 { 983 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 984 PetscErrorCode ierr; 985 PetscInt nt; 986 VecScatter Mvctx = a->Mvctx; 987 988 PetscFunctionBegin; 989 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 990 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 991 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 993 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 994 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 995 PetscFunctionReturn(0); 996 } 997 998 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 999 { 1000 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1001 PetscErrorCode ierr; 1002 1003 PetscFunctionBegin; 1004 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1009 { 1010 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1011 PetscErrorCode ierr; 1012 VecScatter Mvctx = a->Mvctx; 1013 1014 PetscFunctionBegin; 1015 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1016 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1017 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1018 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1019 PetscFunctionReturn(0); 1020 } 1021 1022 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1023 { 1024 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1025 PetscErrorCode ierr; 1026 1027 PetscFunctionBegin; 1028 /* do nondiagonal part */ 1029 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1030 /* do local part */ 1031 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1032 /* add partial results together */ 1033 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1034 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1035 PetscFunctionReturn(0); 1036 } 1037 1038 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1039 { 1040 MPI_Comm comm; 1041 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1042 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1043 IS Me,Notme; 1044 PetscErrorCode ierr; 1045 PetscInt M,N,first,last,*notme,i; 1046 PetscBool lf; 1047 PetscMPIInt size; 1048 1049 PetscFunctionBegin; 1050 /* Easy test: symmetric diagonal block */ 1051 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1052 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1053 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr); 1054 if (!*f) PetscFunctionReturn(0); 1055 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1056 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 1057 if (size == 1) PetscFunctionReturn(0); 1058 1059 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1060 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1061 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1062 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1063 for (i=0; i<first; i++) notme[i] = i; 1064 for (i=last; i<M; i++) notme[i-last+first] = i; 1065 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1066 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1067 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1068 Aoff = Aoffs[0]; 1069 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1070 Boff = Boffs[0]; 1071 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1072 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1073 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1074 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1075 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1076 ierr = PetscFree(notme);CHKERRQ(ierr); 1077 PetscFunctionReturn(0); 1078 } 1079 1080 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1081 { 1082 PetscErrorCode ierr; 1083 1084 PetscFunctionBegin; 1085 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1090 { 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1092 PetscErrorCode ierr; 1093 1094 PetscFunctionBegin; 1095 /* do nondiagonal part */ 1096 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1097 /* do local part */ 1098 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1099 /* add partial results together */ 1100 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1101 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } 1104 1105 /* 1106 This only works correctly for square matrices where the subblock A->A is the 1107 diagonal block 1108 */ 1109 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1110 { 1111 PetscErrorCode ierr; 1112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1113 1114 PetscFunctionBegin; 1115 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1116 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1117 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1122 { 1123 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1128 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1129 PetscFunctionReturn(0); 1130 } 1131 1132 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1133 { 1134 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 #if defined(PETSC_USE_LOG) 1139 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1140 #endif 1141 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1142 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1143 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1144 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1145 #if defined(PETSC_USE_CTABLE) 1146 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1147 #else 1148 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1149 #endif 1150 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1151 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1152 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1153 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1154 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1155 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1156 1157 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1158 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1159 1160 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1161 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1162 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1163 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1164 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1165 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1169 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1170 #if defined(PETSC_HAVE_CUDA) 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1172 #endif 1173 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1174 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr); 1175 #endif 1176 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 1177 #if defined(PETSC_HAVE_ELEMENTAL) 1178 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1179 #endif 1180 #if defined(PETSC_HAVE_SCALAPACK) 1181 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1182 #endif 1183 #if defined(PETSC_HAVE_HYPRE) 1184 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1185 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1186 #endif 1187 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1188 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1189 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1190 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr); 1191 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr); 1192 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr); 1193 #if defined(PETSC_HAVE_MKL_SPARSE) 1194 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr); 1195 #endif 1196 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr); 1197 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1198 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr); 1199 PetscFunctionReturn(0); 1200 } 1201 1202 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1203 { 1204 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1205 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1206 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1207 const PetscInt *garray = aij->garray; 1208 const PetscScalar *aa,*ba; 1209 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1210 PetscInt *rowlens; 1211 PetscInt *colidxs; 1212 PetscScalar *matvals; 1213 PetscErrorCode ierr; 1214 1215 PetscFunctionBegin; 1216 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1217 1218 M = mat->rmap->N; 1219 N = mat->cmap->N; 1220 m = mat->rmap->n; 1221 rs = mat->rmap->rstart; 1222 cs = mat->cmap->rstart; 1223 nz = A->nz + B->nz; 1224 1225 /* write matrix header */ 1226 header[0] = MAT_FILE_CLASSID; 1227 header[1] = M; header[2] = N; header[3] = nz; 1228 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1229 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1230 1231 /* fill in and store row lengths */ 1232 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1233 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1234 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1235 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1236 1237 /* fill in and store column indices */ 1238 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1239 for (cnt=0, i=0; i<m; i++) { 1240 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1241 if (garray[B->j[jb]] > cs) break; 1242 colidxs[cnt++] = garray[B->j[jb]]; 1243 } 1244 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1245 colidxs[cnt++] = A->j[ja] + cs; 1246 for (; jb<B->i[i+1]; jb++) 1247 colidxs[cnt++] = garray[B->j[jb]]; 1248 } 1249 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1250 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1251 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1252 1253 /* fill in and store nonzero values */ 1254 ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr); 1255 ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr); 1256 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1257 for (cnt=0, i=0; i<m; i++) { 1258 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1259 if (garray[B->j[jb]] > cs) break; 1260 matvals[cnt++] = ba[jb]; 1261 } 1262 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1263 matvals[cnt++] = aa[ja]; 1264 for (; jb<B->i[i+1]; jb++) 1265 matvals[cnt++] = ba[jb]; 1266 } 1267 ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr); 1268 ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr); 1269 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1270 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1271 ierr = PetscFree(matvals);CHKERRQ(ierr); 1272 1273 /* write block size option to the viewer's .info file */ 1274 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1275 PetscFunctionReturn(0); 1276 } 1277 1278 #include <petscdraw.h> 1279 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1280 { 1281 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1282 PetscErrorCode ierr; 1283 PetscMPIInt rank = aij->rank,size = aij->size; 1284 PetscBool isdraw,iascii,isbinary; 1285 PetscViewer sviewer; 1286 PetscViewerFormat format; 1287 1288 PetscFunctionBegin; 1289 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1290 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1291 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1292 if (iascii) { 1293 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1294 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1295 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1296 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1297 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1298 for (i=0; i<(PetscInt)size; i++) { 1299 nmax = PetscMax(nmax,nz[i]); 1300 nmin = PetscMin(nmin,nz[i]); 1301 navg += nz[i]; 1302 } 1303 ierr = PetscFree(nz);CHKERRQ(ierr); 1304 navg = navg/size; 1305 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } 1308 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1309 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1310 MatInfo info; 1311 PetscInt *inodes=NULL; 1312 1313 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 1314 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1315 ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr); 1316 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1317 if (!inodes) { 1318 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1319 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1320 } else { 1321 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1322 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1323 } 1324 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1325 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1326 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1327 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1328 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1329 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1330 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1331 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1334 PetscInt inodecount,inodelimit,*inodes; 1335 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1336 if (inodes) { 1337 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1338 } else { 1339 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1340 } 1341 PetscFunctionReturn(0); 1342 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1343 PetscFunctionReturn(0); 1344 } 1345 } else if (isbinary) { 1346 if (size == 1) { 1347 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1348 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1349 } else { 1350 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1351 } 1352 PetscFunctionReturn(0); 1353 } else if (iascii && size == 1) { 1354 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1355 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1356 PetscFunctionReturn(0); 1357 } else if (isdraw) { 1358 PetscDraw draw; 1359 PetscBool isnull; 1360 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1361 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1362 if (isnull) PetscFunctionReturn(0); 1363 } 1364 1365 { /* assemble the entire matrix onto first processor */ 1366 Mat A = NULL, Av; 1367 IS isrow,iscol; 1368 1369 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1370 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1371 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1372 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1373 /* The commented code uses MatCreateSubMatrices instead */ 1374 /* 1375 Mat *AA, A = NULL, Av; 1376 IS isrow,iscol; 1377 1378 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1379 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1380 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1381 if (rank == 0) { 1382 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1383 A = AA[0]; 1384 Av = AA[0]; 1385 } 1386 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1387 */ 1388 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1389 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1390 /* 1391 Everyone has to call to draw the matrix since the graphics waits are 1392 synchronized across all processors that share the PetscDraw object 1393 */ 1394 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1395 if (rank == 0) { 1396 if (((PetscObject)mat)->name) { 1397 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1398 } 1399 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1400 } 1401 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1402 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1403 ierr = MatDestroy(&A);CHKERRQ(ierr); 1404 } 1405 PetscFunctionReturn(0); 1406 } 1407 1408 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1409 { 1410 PetscErrorCode ierr; 1411 PetscBool iascii,isdraw,issocket,isbinary; 1412 1413 PetscFunctionBegin; 1414 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1415 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1416 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1417 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1418 if (iascii || isdraw || isbinary || issocket) { 1419 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1420 } 1421 PetscFunctionReturn(0); 1422 } 1423 1424 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1425 { 1426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1427 PetscErrorCode ierr; 1428 Vec bb1 = NULL; 1429 PetscBool hasop; 1430 1431 PetscFunctionBegin; 1432 if (flag == SOR_APPLY_UPPER) { 1433 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1438 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1439 } 1440 1441 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1442 if (flag & SOR_ZERO_INITIAL_GUESS) { 1443 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1444 its--; 1445 } 1446 1447 while (its--) { 1448 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1449 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1450 1451 /* update rhs: bb1 = bb - B*x */ 1452 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1453 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1454 1455 /* local sweep */ 1456 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1457 } 1458 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1459 if (flag & SOR_ZERO_INITIAL_GUESS) { 1460 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1461 its--; 1462 } 1463 while (its--) { 1464 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1465 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1466 1467 /* update rhs: bb1 = bb - B*x */ 1468 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1469 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1470 1471 /* local sweep */ 1472 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1473 } 1474 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1475 if (flag & SOR_ZERO_INITIAL_GUESS) { 1476 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1477 its--; 1478 } 1479 while (its--) { 1480 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1481 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1482 1483 /* update rhs: bb1 = bb - B*x */ 1484 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1485 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1486 1487 /* local sweep */ 1488 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1489 } 1490 } else if (flag & SOR_EISENSTAT) { 1491 Vec xx1; 1492 1493 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1494 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1495 1496 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1497 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1498 if (!mat->diag) { 1499 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1500 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1501 } 1502 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1503 if (hasop) { 1504 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1505 } else { 1506 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1507 } 1508 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1509 1510 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1511 1512 /* local sweep */ 1513 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1514 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1515 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1516 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1517 1518 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1519 1520 matin->factorerrortype = mat->A->factorerrortype; 1521 PetscFunctionReturn(0); 1522 } 1523 1524 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1525 { 1526 Mat aA,aB,Aperm; 1527 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1528 PetscScalar *aa,*ba; 1529 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1530 PetscSF rowsf,sf; 1531 IS parcolp = NULL; 1532 PetscBool done; 1533 PetscErrorCode ierr; 1534 1535 PetscFunctionBegin; 1536 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1537 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1538 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1539 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1540 1541 /* Invert row permutation to find out where my rows should go */ 1542 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1543 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1544 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1545 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1546 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1547 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1548 1549 /* Invert column permutation to find out where my columns should go */ 1550 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1551 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1552 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1553 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1554 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1555 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1556 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1557 1558 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1559 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1560 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1561 1562 /* Find out where my gcols should go */ 1563 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1564 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1565 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1566 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1567 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1568 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1569 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr); 1570 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1571 1572 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1573 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1574 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1575 for (i=0; i<m; i++) { 1576 PetscInt row = rdest[i]; 1577 PetscMPIInt rowner; 1578 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1579 for (j=ai[i]; j<ai[i+1]; j++) { 1580 PetscInt col = cdest[aj[j]]; 1581 PetscMPIInt cowner; 1582 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1583 if (rowner == cowner) dnnz[i]++; 1584 else onnz[i]++; 1585 } 1586 for (j=bi[i]; j<bi[i+1]; j++) { 1587 PetscInt col = gcdest[bj[j]]; 1588 PetscMPIInt cowner; 1589 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1590 if (rowner == cowner) dnnz[i]++; 1591 else onnz[i]++; 1592 } 1593 } 1594 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1595 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr); 1596 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1597 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr); 1598 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1599 1600 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1601 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1602 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1603 for (i=0; i<m; i++) { 1604 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1605 PetscInt j0,rowlen; 1606 rowlen = ai[i+1] - ai[i]; 1607 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1608 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1609 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1610 } 1611 rowlen = bi[i+1] - bi[i]; 1612 for (j0=j=0; j<rowlen; j0=j) { 1613 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1614 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1615 } 1616 } 1617 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1618 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1619 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1620 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1621 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1622 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1623 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1624 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1625 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1626 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1627 *B = Aperm; 1628 PetscFunctionReturn(0); 1629 } 1630 1631 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1632 { 1633 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1634 PetscErrorCode ierr; 1635 1636 PetscFunctionBegin; 1637 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1638 if (ghosts) *ghosts = aij->garray; 1639 PetscFunctionReturn(0); 1640 } 1641 1642 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1643 { 1644 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1645 Mat A = mat->A,B = mat->B; 1646 PetscErrorCode ierr; 1647 PetscLogDouble isend[5],irecv[5]; 1648 1649 PetscFunctionBegin; 1650 info->block_size = 1.0; 1651 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1652 1653 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1654 isend[3] = info->memory; isend[4] = info->mallocs; 1655 1656 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1657 1658 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1659 isend[3] += info->memory; isend[4] += info->mallocs; 1660 if (flag == MAT_LOCAL) { 1661 info->nz_used = isend[0]; 1662 info->nz_allocated = isend[1]; 1663 info->nz_unneeded = isend[2]; 1664 info->memory = isend[3]; 1665 info->mallocs = isend[4]; 1666 } else if (flag == MAT_GLOBAL_MAX) { 1667 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1668 1669 info->nz_used = irecv[0]; 1670 info->nz_allocated = irecv[1]; 1671 info->nz_unneeded = irecv[2]; 1672 info->memory = irecv[3]; 1673 info->mallocs = irecv[4]; 1674 } else if (flag == MAT_GLOBAL_SUM) { 1675 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1676 1677 info->nz_used = irecv[0]; 1678 info->nz_allocated = irecv[1]; 1679 info->nz_unneeded = irecv[2]; 1680 info->memory = irecv[3]; 1681 info->mallocs = irecv[4]; 1682 } 1683 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1684 info->fill_ratio_needed = 0; 1685 info->factor_mallocs = 0; 1686 PetscFunctionReturn(0); 1687 } 1688 1689 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1690 { 1691 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1692 PetscErrorCode ierr; 1693 1694 PetscFunctionBegin; 1695 switch (op) { 1696 case MAT_NEW_NONZERO_LOCATIONS: 1697 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1698 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1699 case MAT_KEEP_NONZERO_PATTERN: 1700 case MAT_NEW_NONZERO_LOCATION_ERR: 1701 case MAT_USE_INODES: 1702 case MAT_IGNORE_ZERO_ENTRIES: 1703 case MAT_FORM_EXPLICIT_TRANSPOSE: 1704 MatCheckPreallocated(A,1); 1705 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1706 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1707 break; 1708 case MAT_ROW_ORIENTED: 1709 MatCheckPreallocated(A,1); 1710 a->roworiented = flg; 1711 1712 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1713 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1714 break; 1715 case MAT_FORCE_DIAGONAL_ENTRIES: 1716 case MAT_SORTED_FULL: 1717 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1718 break; 1719 case MAT_IGNORE_OFF_PROC_ENTRIES: 1720 a->donotstash = flg; 1721 break; 1722 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1723 case MAT_SPD: 1724 case MAT_SYMMETRIC: 1725 case MAT_STRUCTURALLY_SYMMETRIC: 1726 case MAT_HERMITIAN: 1727 case MAT_SYMMETRY_ETERNAL: 1728 break; 1729 case MAT_SUBMAT_SINGLEIS: 1730 A->submat_singleis = flg; 1731 break; 1732 case MAT_STRUCTURE_ONLY: 1733 /* The option is handled directly by MatSetOption() */ 1734 break; 1735 default: 1736 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1737 } 1738 PetscFunctionReturn(0); 1739 } 1740 1741 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1742 { 1743 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1744 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1745 PetscErrorCode ierr; 1746 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1747 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1748 PetscInt *cmap,*idx_p; 1749 1750 PetscFunctionBegin; 1751 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1752 mat->getrowactive = PETSC_TRUE; 1753 1754 if (!mat->rowvalues && (idx || v)) { 1755 /* 1756 allocate enough space to hold information from the longest row. 1757 */ 1758 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1759 PetscInt max = 1,tmp; 1760 for (i=0; i<matin->rmap->n; i++) { 1761 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1762 if (max < tmp) max = tmp; 1763 } 1764 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1765 } 1766 1767 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1768 lrow = row - rstart; 1769 1770 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1771 if (!v) {pvA = NULL; pvB = NULL;} 1772 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1773 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1774 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1775 nztot = nzA + nzB; 1776 1777 cmap = mat->garray; 1778 if (v || idx) { 1779 if (nztot) { 1780 /* Sort by increasing column numbers, assuming A and B already sorted */ 1781 PetscInt imark = -1; 1782 if (v) { 1783 *v = v_p = mat->rowvalues; 1784 for (i=0; i<nzB; i++) { 1785 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1786 else break; 1787 } 1788 imark = i; 1789 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1790 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1791 } 1792 if (idx) { 1793 *idx = idx_p = mat->rowindices; 1794 if (imark > -1) { 1795 for (i=0; i<imark; i++) { 1796 idx_p[i] = cmap[cworkB[i]]; 1797 } 1798 } else { 1799 for (i=0; i<nzB; i++) { 1800 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1801 else break; 1802 } 1803 imark = i; 1804 } 1805 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1806 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1807 } 1808 } else { 1809 if (idx) *idx = NULL; 1810 if (v) *v = NULL; 1811 } 1812 } 1813 *nz = nztot; 1814 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1815 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1816 PetscFunctionReturn(0); 1817 } 1818 1819 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1820 { 1821 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1822 1823 PetscFunctionBegin; 1824 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1825 aij->getrowactive = PETSC_FALSE; 1826 PetscFunctionReturn(0); 1827 } 1828 1829 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1830 { 1831 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1832 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1833 PetscErrorCode ierr; 1834 PetscInt i,j,cstart = mat->cmap->rstart; 1835 PetscReal sum = 0.0; 1836 MatScalar *v; 1837 1838 PetscFunctionBegin; 1839 if (aij->size == 1) { 1840 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1841 } else { 1842 if (type == NORM_FROBENIUS) { 1843 v = amat->a; 1844 for (i=0; i<amat->nz; i++) { 1845 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1846 } 1847 v = bmat->a; 1848 for (i=0; i<bmat->nz; i++) { 1849 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1850 } 1851 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1852 *norm = PetscSqrtReal(*norm); 1853 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1854 } else if (type == NORM_1) { /* max column norm */ 1855 PetscReal *tmp,*tmp2; 1856 PetscInt *jj,*garray = aij->garray; 1857 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1858 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1859 *norm = 0.0; 1860 v = amat->a; jj = amat->j; 1861 for (j=0; j<amat->nz; j++) { 1862 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1863 } 1864 v = bmat->a; jj = bmat->j; 1865 for (j=0; j<bmat->nz; j++) { 1866 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1867 } 1868 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1869 for (j=0; j<mat->cmap->N; j++) { 1870 if (tmp2[j] > *norm) *norm = tmp2[j]; 1871 } 1872 ierr = PetscFree(tmp);CHKERRQ(ierr); 1873 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1874 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1875 } else if (type == NORM_INFINITY) { /* max row norm */ 1876 PetscReal ntemp = 0.0; 1877 for (j=0; j<aij->A->rmap->n; j++) { 1878 v = amat->a + amat->i[j]; 1879 sum = 0.0; 1880 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1881 sum += PetscAbsScalar(*v); v++; 1882 } 1883 v = bmat->a + bmat->i[j]; 1884 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1885 sum += PetscAbsScalar(*v); v++; 1886 } 1887 if (sum > ntemp) ntemp = sum; 1888 } 1889 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1890 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1891 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1892 } 1893 PetscFunctionReturn(0); 1894 } 1895 1896 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1897 { 1898 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1899 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1900 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1901 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1902 PetscErrorCode ierr; 1903 Mat B,A_diag,*B_diag; 1904 const MatScalar *pbv,*bv; 1905 1906 PetscFunctionBegin; 1907 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1908 ai = Aloc->i; aj = Aloc->j; 1909 bi = Bloc->i; bj = Bloc->j; 1910 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1911 PetscInt *d_nnz,*g_nnz,*o_nnz; 1912 PetscSFNode *oloc; 1913 PETSC_UNUSED PetscSF sf; 1914 1915 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1916 /* compute d_nnz for preallocation */ 1917 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 1918 for (i=0; i<ai[ma]; i++) { 1919 d_nnz[aj[i]]++; 1920 } 1921 /* compute local off-diagonal contributions */ 1922 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 1923 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1924 /* map those to global */ 1925 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1926 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1927 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1928 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 1929 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1930 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1931 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1932 1933 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1934 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1935 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1936 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1937 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1938 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1939 } else { 1940 B = *matout; 1941 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1942 } 1943 1944 b = (Mat_MPIAIJ*)B->data; 1945 A_diag = a->A; 1946 B_diag = &b->A; 1947 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1948 A_diag_ncol = A_diag->cmap->N; 1949 B_diag_ilen = sub_B_diag->ilen; 1950 B_diag_i = sub_B_diag->i; 1951 1952 /* Set ilen for diagonal of B */ 1953 for (i=0; i<A_diag_ncol; i++) { 1954 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1955 } 1956 1957 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1958 very quickly (=without using MatSetValues), because all writes are local. */ 1959 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 1960 1961 /* copy over the B part */ 1962 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 1963 ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr); 1964 pbv = bv; 1965 row = A->rmap->rstart; 1966 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1967 cols_tmp = cols; 1968 for (i=0; i<mb; i++) { 1969 ncol = bi[i+1]-bi[i]; 1970 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr); 1971 row++; 1972 pbv += ncol; cols_tmp += ncol; 1973 } 1974 ierr = PetscFree(cols);CHKERRQ(ierr); 1975 ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr); 1976 1977 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1978 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1979 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1980 *matout = B; 1981 } else { 1982 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1983 } 1984 PetscFunctionReturn(0); 1985 } 1986 1987 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1988 { 1989 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1990 Mat a = aij->A,b = aij->B; 1991 PetscErrorCode ierr; 1992 PetscInt s1,s2,s3; 1993 1994 PetscFunctionBegin; 1995 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1996 if (rr) { 1997 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1998 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1999 /* Overlap communication with computation. */ 2000 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2001 } 2002 if (ll) { 2003 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2004 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2005 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 2006 } 2007 /* scale the diagonal block */ 2008 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2009 2010 if (rr) { 2011 /* Do a scatter end and then right scale the off-diagonal block */ 2012 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2013 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 2014 } 2015 PetscFunctionReturn(0); 2016 } 2017 2018 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2019 { 2020 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2021 PetscErrorCode ierr; 2022 2023 PetscFunctionBegin; 2024 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2025 PetscFunctionReturn(0); 2026 } 2027 2028 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2029 { 2030 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2031 Mat a,b,c,d; 2032 PetscBool flg; 2033 PetscErrorCode ierr; 2034 2035 PetscFunctionBegin; 2036 a = matA->A; b = matA->B; 2037 c = matB->A; d = matB->B; 2038 2039 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2040 if (flg) { 2041 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2042 } 2043 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2044 PetscFunctionReturn(0); 2045 } 2046 2047 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2048 { 2049 PetscErrorCode ierr; 2050 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2051 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2052 2053 PetscFunctionBegin; 2054 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2055 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2056 /* because of the column compression in the off-processor part of the matrix a->B, 2057 the number of columns in a->B and b->B may be different, hence we cannot call 2058 the MatCopy() directly on the two parts. If need be, we can provide a more 2059 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2060 then copying the submatrices */ 2061 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2062 } else { 2063 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2064 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2065 } 2066 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2067 PetscFunctionReturn(0); 2068 } 2069 2070 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2071 { 2072 PetscErrorCode ierr; 2073 2074 PetscFunctionBegin; 2075 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2076 PetscFunctionReturn(0); 2077 } 2078 2079 /* 2080 Computes the number of nonzeros per row needed for preallocation when X and Y 2081 have different nonzero structure. 2082 */ 2083 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2084 { 2085 PetscInt i,j,k,nzx,nzy; 2086 2087 PetscFunctionBegin; 2088 /* Set the number of nonzeros in the new matrix */ 2089 for (i=0; i<m; i++) { 2090 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2091 nzx = xi[i+1] - xi[i]; 2092 nzy = yi[i+1] - yi[i]; 2093 nnz[i] = 0; 2094 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2095 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2096 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2097 nnz[i]++; 2098 } 2099 for (; k<nzy; k++) nnz[i]++; 2100 } 2101 PetscFunctionReturn(0); 2102 } 2103 2104 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2105 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2106 { 2107 PetscErrorCode ierr; 2108 PetscInt m = Y->rmap->N; 2109 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2110 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2111 2112 PetscFunctionBegin; 2113 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2114 PetscFunctionReturn(0); 2115 } 2116 2117 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2118 { 2119 PetscErrorCode ierr; 2120 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2121 2122 PetscFunctionBegin; 2123 if (str == SAME_NONZERO_PATTERN) { 2124 ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr); 2125 ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr); 2126 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2127 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2128 } else { 2129 Mat B; 2130 PetscInt *nnz_d,*nnz_o; 2131 2132 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2133 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2134 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2135 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2136 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2137 ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr); 2138 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2139 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2140 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2141 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2142 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2143 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2144 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2145 } 2146 PetscFunctionReturn(0); 2147 } 2148 2149 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2150 2151 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2152 { 2153 #if defined(PETSC_USE_COMPLEX) 2154 PetscErrorCode ierr; 2155 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2156 2157 PetscFunctionBegin; 2158 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2159 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2160 #else 2161 PetscFunctionBegin; 2162 #endif 2163 PetscFunctionReturn(0); 2164 } 2165 2166 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2167 { 2168 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2169 PetscErrorCode ierr; 2170 2171 PetscFunctionBegin; 2172 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2173 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2174 PetscFunctionReturn(0); 2175 } 2176 2177 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2178 { 2179 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2180 PetscErrorCode ierr; 2181 2182 PetscFunctionBegin; 2183 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2184 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2185 PetscFunctionReturn(0); 2186 } 2187 2188 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2189 { 2190 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2191 PetscErrorCode ierr; 2192 PetscInt i,*idxb = NULL,m = A->rmap->n; 2193 PetscScalar *va,*vv; 2194 Vec vB,vA; 2195 const PetscScalar *vb; 2196 2197 PetscFunctionBegin; 2198 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 2199 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2200 2201 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2202 if (idx) { 2203 for (i=0; i<m; i++) { 2204 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2205 } 2206 } 2207 2208 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 2209 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 2210 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2211 2212 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2213 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2214 for (i=0; i<m; i++) { 2215 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2216 vv[i] = vb[i]; 2217 if (idx) idx[i] = a->garray[idxb[i]]; 2218 } else { 2219 vv[i] = va[i]; 2220 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2221 idx[i] = a->garray[idxb[i]]; 2222 } 2223 } 2224 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2225 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2226 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2227 ierr = PetscFree(idxb);CHKERRQ(ierr); 2228 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2229 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } 2232 2233 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2234 { 2235 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2236 PetscInt m = A->rmap->n,n = A->cmap->n; 2237 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2238 PetscInt *cmap = mat->garray; 2239 PetscInt *diagIdx, *offdiagIdx; 2240 Vec diagV, offdiagV; 2241 PetscScalar *a, *diagA, *offdiagA; 2242 const PetscScalar *ba,*bav; 2243 PetscInt r,j,col,ncols,*bi,*bj; 2244 PetscErrorCode ierr; 2245 Mat B = mat->B; 2246 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2247 2248 PetscFunctionBegin; 2249 /* When a process holds entire A and other processes have no entry */ 2250 if (A->cmap->N == n) { 2251 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2252 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2253 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr); 2254 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2255 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2256 PetscFunctionReturn(0); 2257 } else if (n == 0) { 2258 if (m) { 2259 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2260 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2261 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2262 } 2263 PetscFunctionReturn(0); 2264 } 2265 2266 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2267 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2268 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2269 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2270 2271 /* Get offdiagIdx[] for implicit 0.0 */ 2272 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2273 ba = bav; 2274 bi = b->i; 2275 bj = b->j; 2276 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2277 for (r = 0; r < m; r++) { 2278 ncols = bi[r+1] - bi[r]; 2279 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2280 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2281 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2282 offdiagA[r] = 0.0; 2283 2284 /* Find first hole in the cmap */ 2285 for (j=0; j<ncols; j++) { 2286 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2287 if (col > j && j < cstart) { 2288 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2289 break; 2290 } else if (col > j + n && j >= cstart) { 2291 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2292 break; 2293 } 2294 } 2295 if (j == ncols && ncols < A->cmap->N - n) { 2296 /* a hole is outside compressed Bcols */ 2297 if (ncols == 0) { 2298 if (cstart) { 2299 offdiagIdx[r] = 0; 2300 } else offdiagIdx[r] = cend; 2301 } else { /* ncols > 0 */ 2302 offdiagIdx[r] = cmap[ncols-1] + 1; 2303 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2304 } 2305 } 2306 } 2307 2308 for (j=0; j<ncols; j++) { 2309 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2310 ba++; bj++; 2311 } 2312 } 2313 2314 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2315 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2316 for (r = 0; r < m; ++r) { 2317 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2318 a[r] = diagA[r]; 2319 if (idx) idx[r] = cstart + diagIdx[r]; 2320 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2321 a[r] = diagA[r]; 2322 if (idx) { 2323 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2324 idx[r] = cstart + diagIdx[r]; 2325 } else idx[r] = offdiagIdx[r]; 2326 } 2327 } else { 2328 a[r] = offdiagA[r]; 2329 if (idx) idx[r] = offdiagIdx[r]; 2330 } 2331 } 2332 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2333 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2334 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2335 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2336 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2337 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2338 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2339 PetscFunctionReturn(0); 2340 } 2341 2342 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2343 { 2344 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2345 PetscInt m = A->rmap->n,n = A->cmap->n; 2346 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2347 PetscInt *cmap = mat->garray; 2348 PetscInt *diagIdx, *offdiagIdx; 2349 Vec diagV, offdiagV; 2350 PetscScalar *a, *diagA, *offdiagA; 2351 const PetscScalar *ba,*bav; 2352 PetscInt r,j,col,ncols,*bi,*bj; 2353 PetscErrorCode ierr; 2354 Mat B = mat->B; 2355 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2356 2357 PetscFunctionBegin; 2358 /* When a process holds entire A and other processes have no entry */ 2359 if (A->cmap->N == n) { 2360 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2361 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2362 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2363 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2364 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2365 PetscFunctionReturn(0); 2366 } else if (n == 0) { 2367 if (m) { 2368 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2369 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2370 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2371 } 2372 PetscFunctionReturn(0); 2373 } 2374 2375 ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2376 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2377 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2378 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2379 2380 /* Get offdiagIdx[] for implicit 0.0 */ 2381 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2382 ba = bav; 2383 bi = b->i; 2384 bj = b->j; 2385 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2386 for (r = 0; r < m; r++) { 2387 ncols = bi[r+1] - bi[r]; 2388 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2389 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2390 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2391 offdiagA[r] = 0.0; 2392 2393 /* Find first hole in the cmap */ 2394 for (j=0; j<ncols; j++) { 2395 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2396 if (col > j && j < cstart) { 2397 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2398 break; 2399 } else if (col > j + n && j >= cstart) { 2400 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2401 break; 2402 } 2403 } 2404 if (j == ncols && ncols < A->cmap->N - n) { 2405 /* a hole is outside compressed Bcols */ 2406 if (ncols == 0) { 2407 if (cstart) { 2408 offdiagIdx[r] = 0; 2409 } else offdiagIdx[r] = cend; 2410 } else { /* ncols > 0 */ 2411 offdiagIdx[r] = cmap[ncols-1] + 1; 2412 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2413 } 2414 } 2415 } 2416 2417 for (j=0; j<ncols; j++) { 2418 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2419 ba++; bj++; 2420 } 2421 } 2422 2423 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2424 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2425 for (r = 0; r < m; ++r) { 2426 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2427 a[r] = diagA[r]; 2428 if (idx) idx[r] = cstart + diagIdx[r]; 2429 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2430 a[r] = diagA[r]; 2431 if (idx) { 2432 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2433 idx[r] = cstart + diagIdx[r]; 2434 } else idx[r] = offdiagIdx[r]; 2435 } 2436 } else { 2437 a[r] = offdiagA[r]; 2438 if (idx) idx[r] = offdiagIdx[r]; 2439 } 2440 } 2441 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2442 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2443 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2444 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2445 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2446 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2447 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2448 PetscFunctionReturn(0); 2449 } 2450 2451 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2452 { 2453 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2454 PetscInt m = A->rmap->n,n = A->cmap->n; 2455 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2456 PetscInt *cmap = mat->garray; 2457 PetscInt *diagIdx, *offdiagIdx; 2458 Vec diagV, offdiagV; 2459 PetscScalar *a, *diagA, *offdiagA; 2460 const PetscScalar *ba,*bav; 2461 PetscInt r,j,col,ncols,*bi,*bj; 2462 PetscErrorCode ierr; 2463 Mat B = mat->B; 2464 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2465 2466 PetscFunctionBegin; 2467 /* When a process holds entire A and other processes have no entry */ 2468 if (A->cmap->N == n) { 2469 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2470 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2471 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2472 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2473 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2474 PetscFunctionReturn(0); 2475 } else if (n == 0) { 2476 if (m) { 2477 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2478 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2479 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2480 } 2481 PetscFunctionReturn(0); 2482 } 2483 2484 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2485 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2486 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2487 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2488 2489 /* Get offdiagIdx[] for implicit 0.0 */ 2490 ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr); 2491 ba = bav; 2492 bi = b->i; 2493 bj = b->j; 2494 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2495 for (r = 0; r < m; r++) { 2496 ncols = bi[r+1] - bi[r]; 2497 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2498 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2499 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2500 offdiagA[r] = 0.0; 2501 2502 /* Find first hole in the cmap */ 2503 for (j=0; j<ncols; j++) { 2504 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2505 if (col > j && j < cstart) { 2506 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2507 break; 2508 } else if (col > j + n && j >= cstart) { 2509 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2510 break; 2511 } 2512 } 2513 if (j == ncols && ncols < A->cmap->N - n) { 2514 /* a hole is outside compressed Bcols */ 2515 if (ncols == 0) { 2516 if (cstart) { 2517 offdiagIdx[r] = 0; 2518 } else offdiagIdx[r] = cend; 2519 } else { /* ncols > 0 */ 2520 offdiagIdx[r] = cmap[ncols-1] + 1; 2521 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2522 } 2523 } 2524 } 2525 2526 for (j=0; j<ncols; j++) { 2527 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2528 ba++; bj++; 2529 } 2530 } 2531 2532 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2533 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2534 for (r = 0; r < m; ++r) { 2535 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2536 a[r] = diagA[r]; 2537 if (idx) idx[r] = cstart + diagIdx[r]; 2538 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2539 a[r] = diagA[r]; 2540 if (idx) { 2541 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2542 idx[r] = cstart + diagIdx[r]; 2543 } else idx[r] = offdiagIdx[r]; 2544 } 2545 } else { 2546 a[r] = offdiagA[r]; 2547 if (idx) idx[r] = offdiagIdx[r]; 2548 } 2549 } 2550 ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr); 2551 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2552 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2553 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2554 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2555 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2556 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2557 PetscFunctionReturn(0); 2558 } 2559 2560 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2561 { 2562 PetscErrorCode ierr; 2563 Mat *dummy; 2564 2565 PetscFunctionBegin; 2566 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2567 *newmat = *dummy; 2568 ierr = PetscFree(dummy);CHKERRQ(ierr); 2569 PetscFunctionReturn(0); 2570 } 2571 2572 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2573 { 2574 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2575 PetscErrorCode ierr; 2576 2577 PetscFunctionBegin; 2578 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2579 A->factorerrortype = a->A->factorerrortype; 2580 PetscFunctionReturn(0); 2581 } 2582 2583 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2584 { 2585 PetscErrorCode ierr; 2586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2587 2588 PetscFunctionBegin; 2589 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2590 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2591 if (x->assembled) { 2592 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2593 } else { 2594 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2595 } 2596 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2597 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2598 PetscFunctionReturn(0); 2599 } 2600 2601 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2602 { 2603 PetscFunctionBegin; 2604 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2605 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2606 PetscFunctionReturn(0); 2607 } 2608 2609 /*@ 2610 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2611 2612 Collective on Mat 2613 2614 Input Parameters: 2615 + A - the matrix 2616 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2617 2618 Level: advanced 2619 2620 @*/ 2621 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2622 { 2623 PetscErrorCode ierr; 2624 2625 PetscFunctionBegin; 2626 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2627 PetscFunctionReturn(0); 2628 } 2629 2630 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2631 { 2632 PetscErrorCode ierr; 2633 PetscBool sc = PETSC_FALSE,flg; 2634 2635 PetscFunctionBegin; 2636 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2637 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2638 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2639 if (flg) { 2640 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2641 } 2642 ierr = PetscOptionsTail();CHKERRQ(ierr); 2643 PetscFunctionReturn(0); 2644 } 2645 2646 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2647 { 2648 PetscErrorCode ierr; 2649 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2650 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2651 2652 PetscFunctionBegin; 2653 if (!Y->preallocated) { 2654 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2655 } else if (!aij->nz) { 2656 PetscInt nonew = aij->nonew; 2657 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2658 aij->nonew = nonew; 2659 } 2660 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2661 PetscFunctionReturn(0); 2662 } 2663 2664 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2665 { 2666 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2667 PetscErrorCode ierr; 2668 2669 PetscFunctionBegin; 2670 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2671 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2672 if (d) { 2673 PetscInt rstart; 2674 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2675 *d += rstart; 2676 2677 } 2678 PetscFunctionReturn(0); 2679 } 2680 2681 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2682 { 2683 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2684 PetscErrorCode ierr; 2685 2686 PetscFunctionBegin; 2687 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2688 PetscFunctionReturn(0); 2689 } 2690 2691 /* -------------------------------------------------------------------*/ 2692 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2693 MatGetRow_MPIAIJ, 2694 MatRestoreRow_MPIAIJ, 2695 MatMult_MPIAIJ, 2696 /* 4*/ MatMultAdd_MPIAIJ, 2697 MatMultTranspose_MPIAIJ, 2698 MatMultTransposeAdd_MPIAIJ, 2699 NULL, 2700 NULL, 2701 NULL, 2702 /*10*/ NULL, 2703 NULL, 2704 NULL, 2705 MatSOR_MPIAIJ, 2706 MatTranspose_MPIAIJ, 2707 /*15*/ MatGetInfo_MPIAIJ, 2708 MatEqual_MPIAIJ, 2709 MatGetDiagonal_MPIAIJ, 2710 MatDiagonalScale_MPIAIJ, 2711 MatNorm_MPIAIJ, 2712 /*20*/ MatAssemblyBegin_MPIAIJ, 2713 MatAssemblyEnd_MPIAIJ, 2714 MatSetOption_MPIAIJ, 2715 MatZeroEntries_MPIAIJ, 2716 /*24*/ MatZeroRows_MPIAIJ, 2717 NULL, 2718 NULL, 2719 NULL, 2720 NULL, 2721 /*29*/ MatSetUp_MPIAIJ, 2722 NULL, 2723 NULL, 2724 MatGetDiagonalBlock_MPIAIJ, 2725 NULL, 2726 /*34*/ MatDuplicate_MPIAIJ, 2727 NULL, 2728 NULL, 2729 NULL, 2730 NULL, 2731 /*39*/ MatAXPY_MPIAIJ, 2732 MatCreateSubMatrices_MPIAIJ, 2733 MatIncreaseOverlap_MPIAIJ, 2734 MatGetValues_MPIAIJ, 2735 MatCopy_MPIAIJ, 2736 /*44*/ MatGetRowMax_MPIAIJ, 2737 MatScale_MPIAIJ, 2738 MatShift_MPIAIJ, 2739 MatDiagonalSet_MPIAIJ, 2740 MatZeroRowsColumns_MPIAIJ, 2741 /*49*/ MatSetRandom_MPIAIJ, 2742 NULL, 2743 NULL, 2744 NULL, 2745 NULL, 2746 /*54*/ MatFDColoringCreate_MPIXAIJ, 2747 NULL, 2748 MatSetUnfactored_MPIAIJ, 2749 MatPermute_MPIAIJ, 2750 NULL, 2751 /*59*/ MatCreateSubMatrix_MPIAIJ, 2752 MatDestroy_MPIAIJ, 2753 MatView_MPIAIJ, 2754 NULL, 2755 NULL, 2756 /*64*/ NULL, 2757 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2758 NULL, 2759 NULL, 2760 NULL, 2761 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2762 MatGetRowMinAbs_MPIAIJ, 2763 NULL, 2764 NULL, 2765 NULL, 2766 NULL, 2767 /*75*/ MatFDColoringApply_AIJ, 2768 MatSetFromOptions_MPIAIJ, 2769 NULL, 2770 NULL, 2771 MatFindZeroDiagonals_MPIAIJ, 2772 /*80*/ NULL, 2773 NULL, 2774 NULL, 2775 /*83*/ MatLoad_MPIAIJ, 2776 MatIsSymmetric_MPIAIJ, 2777 NULL, 2778 NULL, 2779 NULL, 2780 NULL, 2781 /*89*/ NULL, 2782 NULL, 2783 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2784 NULL, 2785 NULL, 2786 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2787 NULL, 2788 NULL, 2789 NULL, 2790 MatBindToCPU_MPIAIJ, 2791 /*99*/ MatProductSetFromOptions_MPIAIJ, 2792 NULL, 2793 NULL, 2794 MatConjugate_MPIAIJ, 2795 NULL, 2796 /*104*/MatSetValuesRow_MPIAIJ, 2797 MatRealPart_MPIAIJ, 2798 MatImaginaryPart_MPIAIJ, 2799 NULL, 2800 NULL, 2801 /*109*/NULL, 2802 NULL, 2803 MatGetRowMin_MPIAIJ, 2804 NULL, 2805 MatMissingDiagonal_MPIAIJ, 2806 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2807 NULL, 2808 MatGetGhosts_MPIAIJ, 2809 NULL, 2810 NULL, 2811 /*119*/MatMultDiagonalBlock_MPIAIJ, 2812 NULL, 2813 NULL, 2814 NULL, 2815 MatGetMultiProcBlock_MPIAIJ, 2816 /*124*/MatFindNonzeroRows_MPIAIJ, 2817 MatGetColumnReductions_MPIAIJ, 2818 MatInvertBlockDiagonal_MPIAIJ, 2819 MatInvertVariableBlockDiagonal_MPIAIJ, 2820 MatCreateSubMatricesMPI_MPIAIJ, 2821 /*129*/NULL, 2822 NULL, 2823 NULL, 2824 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2825 NULL, 2826 /*134*/NULL, 2827 NULL, 2828 NULL, 2829 NULL, 2830 NULL, 2831 /*139*/MatSetBlockSizes_MPIAIJ, 2832 NULL, 2833 NULL, 2834 MatFDColoringSetUp_MPIXAIJ, 2835 MatFindOffBlockDiagonalEntries_MPIAIJ, 2836 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2837 /*145*/NULL, 2838 NULL, 2839 NULL 2840 }; 2841 2842 /* ----------------------------------------------------------------------------------------*/ 2843 2844 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2845 { 2846 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2847 PetscErrorCode ierr; 2848 2849 PetscFunctionBegin; 2850 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2851 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2852 PetscFunctionReturn(0); 2853 } 2854 2855 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2856 { 2857 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2858 PetscErrorCode ierr; 2859 2860 PetscFunctionBegin; 2861 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2862 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2863 PetscFunctionReturn(0); 2864 } 2865 2866 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2867 { 2868 Mat_MPIAIJ *b; 2869 PetscErrorCode ierr; 2870 PetscMPIInt size; 2871 2872 PetscFunctionBegin; 2873 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2874 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2875 b = (Mat_MPIAIJ*)B->data; 2876 2877 #if defined(PETSC_USE_CTABLE) 2878 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2879 #else 2880 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2881 #endif 2882 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2883 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2884 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2885 2886 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2887 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 2888 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2889 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2890 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2891 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2892 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2893 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2894 2895 if (!B->preallocated) { 2896 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2897 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2898 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2899 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2900 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2901 } 2902 2903 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2904 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2905 B->preallocated = PETSC_TRUE; 2906 B->was_assembled = PETSC_FALSE; 2907 B->assembled = PETSC_FALSE; 2908 PetscFunctionReturn(0); 2909 } 2910 2911 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2912 { 2913 Mat_MPIAIJ *b; 2914 PetscErrorCode ierr; 2915 2916 PetscFunctionBegin; 2917 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2918 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2919 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2920 b = (Mat_MPIAIJ*)B->data; 2921 2922 #if defined(PETSC_USE_CTABLE) 2923 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2924 #else 2925 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2926 #endif 2927 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2928 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2929 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2930 2931 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2932 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2933 B->preallocated = PETSC_TRUE; 2934 B->was_assembled = PETSC_FALSE; 2935 B->assembled = PETSC_FALSE; 2936 PetscFunctionReturn(0); 2937 } 2938 2939 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2940 { 2941 Mat mat; 2942 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2943 PetscErrorCode ierr; 2944 2945 PetscFunctionBegin; 2946 *newmat = NULL; 2947 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2948 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2949 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2950 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2951 a = (Mat_MPIAIJ*)mat->data; 2952 2953 mat->factortype = matin->factortype; 2954 mat->assembled = matin->assembled; 2955 mat->insertmode = NOT_SET_VALUES; 2956 mat->preallocated = matin->preallocated; 2957 2958 a->size = oldmat->size; 2959 a->rank = oldmat->rank; 2960 a->donotstash = oldmat->donotstash; 2961 a->roworiented = oldmat->roworiented; 2962 a->rowindices = NULL; 2963 a->rowvalues = NULL; 2964 a->getrowactive = PETSC_FALSE; 2965 2966 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2967 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2968 2969 if (oldmat->colmap) { 2970 #if defined(PETSC_USE_CTABLE) 2971 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2972 #else 2973 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2974 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2975 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2976 #endif 2977 } else a->colmap = NULL; 2978 if (oldmat->garray) { 2979 PetscInt len; 2980 len = oldmat->B->cmap->n; 2981 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2982 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2983 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2984 } else a->garray = NULL; 2985 2986 /* It may happen MatDuplicate is called with a non-assembled matrix 2987 In fact, MatDuplicate only requires the matrix to be preallocated 2988 This may happen inside a DMCreateMatrix_Shell */ 2989 if (oldmat->lvec) { 2990 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2991 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2992 } 2993 if (oldmat->Mvctx) { 2994 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2995 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2996 } 2997 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2998 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2999 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3000 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3001 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3002 *newmat = mat; 3003 PetscFunctionReturn(0); 3004 } 3005 3006 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3007 { 3008 PetscBool isbinary, ishdf5; 3009 PetscErrorCode ierr; 3010 3011 PetscFunctionBegin; 3012 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 3013 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 3014 /* force binary viewer to load .info file if it has not yet done so */ 3015 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3016 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3017 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 3018 if (isbinary) { 3019 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 3020 } else if (ishdf5) { 3021 #if defined(PETSC_HAVE_HDF5) 3022 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 3023 #else 3024 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3025 #endif 3026 } else { 3027 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3028 } 3029 PetscFunctionReturn(0); 3030 } 3031 3032 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3033 { 3034 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3035 PetscInt *rowidxs,*colidxs; 3036 PetscScalar *matvals; 3037 PetscErrorCode ierr; 3038 3039 PetscFunctionBegin; 3040 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3041 3042 /* read in matrix header */ 3043 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3044 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3045 M = header[1]; N = header[2]; nz = header[3]; 3046 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3047 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3048 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3049 3050 /* set block sizes from the viewer's .info file */ 3051 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3052 /* set global sizes if not set already */ 3053 if (mat->rmap->N < 0) mat->rmap->N = M; 3054 if (mat->cmap->N < 0) mat->cmap->N = N; 3055 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3056 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3057 3058 /* check if the matrix sizes are correct */ 3059 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3060 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3061 3062 /* read in row lengths and build row indices */ 3063 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3064 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3065 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3066 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3067 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr); 3068 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3069 /* read in column indices and matrix values */ 3070 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3071 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3072 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3073 /* store matrix indices and values */ 3074 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3075 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3076 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3077 PetscFunctionReturn(0); 3078 } 3079 3080 /* Not scalable because of ISAllGather() unless getting all columns. */ 3081 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3082 { 3083 PetscErrorCode ierr; 3084 IS iscol_local; 3085 PetscBool isstride; 3086 PetscMPIInt lisstride=0,gisstride; 3087 3088 PetscFunctionBegin; 3089 /* check if we are grabbing all columns*/ 3090 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3091 3092 if (isstride) { 3093 PetscInt start,len,mstart,mlen; 3094 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3095 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3096 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3097 if (mstart == start && mlen-mstart == len) lisstride = 1; 3098 } 3099 3100 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3101 if (gisstride) { 3102 PetscInt N; 3103 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3104 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3105 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3106 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3107 } else { 3108 PetscInt cbs; 3109 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3110 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3111 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3112 } 3113 3114 *isseq = iscol_local; 3115 PetscFunctionReturn(0); 3116 } 3117 3118 /* 3119 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3120 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3121 3122 Input Parameters: 3123 mat - matrix 3124 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3125 i.e., mat->rstart <= isrow[i] < mat->rend 3126 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3127 i.e., mat->cstart <= iscol[i] < mat->cend 3128 Output Parameter: 3129 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3130 iscol_o - sequential column index set for retrieving mat->B 3131 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3132 */ 3133 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3134 { 3135 PetscErrorCode ierr; 3136 Vec x,cmap; 3137 const PetscInt *is_idx; 3138 PetscScalar *xarray,*cmaparray; 3139 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3140 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3141 Mat B=a->B; 3142 Vec lvec=a->lvec,lcmap; 3143 PetscInt i,cstart,cend,Bn=B->cmap->N; 3144 MPI_Comm comm; 3145 VecScatter Mvctx=a->Mvctx; 3146 3147 PetscFunctionBegin; 3148 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3149 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3150 3151 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3152 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3153 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3154 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3155 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3156 3157 /* Get start indices */ 3158 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3159 isstart -= ncols; 3160 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3161 3162 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3163 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3164 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3165 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3166 for (i=0; i<ncols; i++) { 3167 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3168 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3169 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3170 } 3171 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3172 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3173 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3174 3175 /* Get iscol_d */ 3176 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3177 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3178 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3179 3180 /* Get isrow_d */ 3181 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3182 rstart = mat->rmap->rstart; 3183 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3184 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3185 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3186 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3187 3188 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3189 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3190 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3191 3192 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3193 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3194 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3195 3196 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3197 3198 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3199 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3200 3201 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3202 /* off-process column indices */ 3203 count = 0; 3204 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3205 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3206 3207 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3208 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3209 for (i=0; i<Bn; i++) { 3210 if (PetscRealPart(xarray[i]) > -1.0) { 3211 idx[count] = i; /* local column index in off-diagonal part B */ 3212 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3213 count++; 3214 } 3215 } 3216 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3217 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3218 3219 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3220 /* cannot ensure iscol_o has same blocksize as iscol! */ 3221 3222 ierr = PetscFree(idx);CHKERRQ(ierr); 3223 *garray = cmap1; 3224 3225 ierr = VecDestroy(&x);CHKERRQ(ierr); 3226 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3227 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3228 PetscFunctionReturn(0); 3229 } 3230 3231 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3232 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3233 { 3234 PetscErrorCode ierr; 3235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3236 Mat M = NULL; 3237 MPI_Comm comm; 3238 IS iscol_d,isrow_d,iscol_o; 3239 Mat Asub = NULL,Bsub = NULL; 3240 PetscInt n; 3241 3242 PetscFunctionBegin; 3243 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3244 3245 if (call == MAT_REUSE_MATRIX) { 3246 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3247 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3248 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3249 3250 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3251 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3252 3253 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3254 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3255 3256 /* Update diagonal and off-diagonal portions of submat */ 3257 asub = (Mat_MPIAIJ*)(*submat)->data; 3258 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3259 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3260 if (n) { 3261 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3262 } 3263 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3264 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3265 3266 } else { /* call == MAT_INITIAL_MATRIX) */ 3267 const PetscInt *garray; 3268 PetscInt BsubN; 3269 3270 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3271 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3272 3273 /* Create local submatrices Asub and Bsub */ 3274 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3275 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3276 3277 /* Create submatrix M */ 3278 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3279 3280 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3281 asub = (Mat_MPIAIJ*)M->data; 3282 3283 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3284 n = asub->B->cmap->N; 3285 if (BsubN > n) { 3286 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3287 const PetscInt *idx; 3288 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3289 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3290 3291 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3292 j = 0; 3293 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3294 for (i=0; i<n; i++) { 3295 if (j >= BsubN) break; 3296 while (subgarray[i] > garray[j]) j++; 3297 3298 if (subgarray[i] == garray[j]) { 3299 idx_new[i] = idx[j++]; 3300 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3301 } 3302 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3303 3304 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3305 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3306 3307 } else if (BsubN < n) { 3308 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3309 } 3310 3311 ierr = PetscFree(garray);CHKERRQ(ierr); 3312 *submat = M; 3313 3314 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3315 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3316 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3317 3318 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3319 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3320 3321 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3322 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3323 } 3324 PetscFunctionReturn(0); 3325 } 3326 3327 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3328 { 3329 PetscErrorCode ierr; 3330 IS iscol_local=NULL,isrow_d; 3331 PetscInt csize; 3332 PetscInt n,i,j,start,end; 3333 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3334 MPI_Comm comm; 3335 3336 PetscFunctionBegin; 3337 /* If isrow has same processor distribution as mat, 3338 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3339 if (call == MAT_REUSE_MATRIX) { 3340 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3341 if (isrow_d) { 3342 sameRowDist = PETSC_TRUE; 3343 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3344 } else { 3345 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3346 if (iscol_local) { 3347 sameRowDist = PETSC_TRUE; 3348 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3349 } 3350 } 3351 } else { 3352 /* Check if isrow has same processor distribution as mat */ 3353 sameDist[0] = PETSC_FALSE; 3354 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3355 if (!n) { 3356 sameDist[0] = PETSC_TRUE; 3357 } else { 3358 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3359 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3360 if (i >= start && j < end) { 3361 sameDist[0] = PETSC_TRUE; 3362 } 3363 } 3364 3365 /* Check if iscol has same processor distribution as mat */ 3366 sameDist[1] = PETSC_FALSE; 3367 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3368 if (!n) { 3369 sameDist[1] = PETSC_TRUE; 3370 } else { 3371 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3372 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3373 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3374 } 3375 3376 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3377 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr); 3378 sameRowDist = tsameDist[0]; 3379 } 3380 3381 if (sameRowDist) { 3382 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3383 /* isrow and iscol have same processor distribution as mat */ 3384 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3385 PetscFunctionReturn(0); 3386 } else { /* sameRowDist */ 3387 /* isrow has same processor distribution as mat */ 3388 if (call == MAT_INITIAL_MATRIX) { 3389 PetscBool sorted; 3390 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3391 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3392 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3393 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3394 3395 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3396 if (sorted) { 3397 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3398 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3399 PetscFunctionReturn(0); 3400 } 3401 } else { /* call == MAT_REUSE_MATRIX */ 3402 IS iscol_sub; 3403 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3404 if (iscol_sub) { 3405 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3406 PetscFunctionReturn(0); 3407 } 3408 } 3409 } 3410 } 3411 3412 /* General case: iscol -> iscol_local which has global size of iscol */ 3413 if (call == MAT_REUSE_MATRIX) { 3414 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3415 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3416 } else { 3417 if (!iscol_local) { 3418 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3419 } 3420 } 3421 3422 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3423 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3424 3425 if (call == MAT_INITIAL_MATRIX) { 3426 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3427 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3428 } 3429 PetscFunctionReturn(0); 3430 } 3431 3432 /*@C 3433 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3434 and "off-diagonal" part of the matrix in CSR format. 3435 3436 Collective 3437 3438 Input Parameters: 3439 + comm - MPI communicator 3440 . A - "diagonal" portion of matrix 3441 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3442 - garray - global index of B columns 3443 3444 Output Parameter: 3445 . mat - the matrix, with input A as its local diagonal matrix 3446 Level: advanced 3447 3448 Notes: 3449 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3450 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3451 3452 .seealso: MatCreateMPIAIJWithSplitArrays() 3453 @*/ 3454 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3455 { 3456 PetscErrorCode ierr; 3457 Mat_MPIAIJ *maij; 3458 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3459 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3460 const PetscScalar *oa; 3461 Mat Bnew; 3462 PetscInt m,n,N; 3463 3464 PetscFunctionBegin; 3465 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3466 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3467 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3468 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3469 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3470 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3471 3472 /* Get global columns of mat */ 3473 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3474 3475 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3476 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3477 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3478 maij = (Mat_MPIAIJ*)(*mat)->data; 3479 3480 (*mat)->preallocated = PETSC_TRUE; 3481 3482 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3483 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3484 3485 /* Set A as diagonal portion of *mat */ 3486 maij->A = A; 3487 3488 nz = oi[m]; 3489 for (i=0; i<nz; i++) { 3490 col = oj[i]; 3491 oj[i] = garray[col]; 3492 } 3493 3494 /* Set Bnew as off-diagonal portion of *mat */ 3495 ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr); 3496 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr); 3497 ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr); 3498 bnew = (Mat_SeqAIJ*)Bnew->data; 3499 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3500 maij->B = Bnew; 3501 3502 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3503 3504 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3505 b->free_a = PETSC_FALSE; 3506 b->free_ij = PETSC_FALSE; 3507 ierr = MatDestroy(&B);CHKERRQ(ierr); 3508 3509 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3510 bnew->free_a = PETSC_TRUE; 3511 bnew->free_ij = PETSC_TRUE; 3512 3513 /* condense columns of maij->B */ 3514 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3515 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3516 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3517 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3518 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3519 PetscFunctionReturn(0); 3520 } 3521 3522 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3523 3524 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3525 { 3526 PetscErrorCode ierr; 3527 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3528 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3529 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3530 Mat M,Msub,B=a->B; 3531 MatScalar *aa; 3532 Mat_SeqAIJ *aij; 3533 PetscInt *garray = a->garray,*colsub,Ncols; 3534 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3535 IS iscol_sub,iscmap; 3536 const PetscInt *is_idx,*cmap; 3537 PetscBool allcolumns=PETSC_FALSE; 3538 MPI_Comm comm; 3539 3540 PetscFunctionBegin; 3541 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3542 if (call == MAT_REUSE_MATRIX) { 3543 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3544 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3545 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3546 3547 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3548 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3549 3550 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3551 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3552 3553 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3554 3555 } else { /* call == MAT_INITIAL_MATRIX) */ 3556 PetscBool flg; 3557 3558 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3559 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3560 3561 /* (1) iscol -> nonscalable iscol_local */ 3562 /* Check for special case: each processor gets entire matrix columns */ 3563 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3564 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3565 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3566 if (allcolumns) { 3567 iscol_sub = iscol_local; 3568 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3569 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3570 3571 } else { 3572 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3573 PetscInt *idx,*cmap1,k; 3574 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3575 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3576 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3577 count = 0; 3578 k = 0; 3579 for (i=0; i<Ncols; i++) { 3580 j = is_idx[i]; 3581 if (j >= cstart && j < cend) { 3582 /* diagonal part of mat */ 3583 idx[count] = j; 3584 cmap1[count++] = i; /* column index in submat */ 3585 } else if (Bn) { 3586 /* off-diagonal part of mat */ 3587 if (j == garray[k]) { 3588 idx[count] = j; 3589 cmap1[count++] = i; /* column index in submat */ 3590 } else if (j > garray[k]) { 3591 while (j > garray[k] && k < Bn-1) k++; 3592 if (j == garray[k]) { 3593 idx[count] = j; 3594 cmap1[count++] = i; /* column index in submat */ 3595 } 3596 } 3597 } 3598 } 3599 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3600 3601 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3602 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3603 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3604 3605 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3606 } 3607 3608 /* (3) Create sequential Msub */ 3609 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3610 } 3611 3612 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3613 aij = (Mat_SeqAIJ*)(Msub)->data; 3614 ii = aij->i; 3615 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3616 3617 /* 3618 m - number of local rows 3619 Ncols - number of columns (same on all processors) 3620 rstart - first row in new global matrix generated 3621 */ 3622 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3623 3624 if (call == MAT_INITIAL_MATRIX) { 3625 /* (4) Create parallel newmat */ 3626 PetscMPIInt rank,size; 3627 PetscInt csize; 3628 3629 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3630 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3631 3632 /* 3633 Determine the number of non-zeros in the diagonal and off-diagonal 3634 portions of the matrix in order to do correct preallocation 3635 */ 3636 3637 /* first get start and end of "diagonal" columns */ 3638 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3639 if (csize == PETSC_DECIDE) { 3640 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3641 if (mglobal == Ncols) { /* square matrix */ 3642 nlocal = m; 3643 } else { 3644 nlocal = Ncols/size + ((Ncols % size) > rank); 3645 } 3646 } else { 3647 nlocal = csize; 3648 } 3649 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3650 rstart = rend - nlocal; 3651 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3652 3653 /* next, compute all the lengths */ 3654 jj = aij->j; 3655 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3656 olens = dlens + m; 3657 for (i=0; i<m; i++) { 3658 jend = ii[i+1] - ii[i]; 3659 olen = 0; 3660 dlen = 0; 3661 for (j=0; j<jend; j++) { 3662 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3663 else dlen++; 3664 jj++; 3665 } 3666 olens[i] = olen; 3667 dlens[i] = dlen; 3668 } 3669 3670 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3671 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3672 3673 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3674 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3675 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3676 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3677 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3678 ierr = PetscFree(dlens);CHKERRQ(ierr); 3679 3680 } else { /* call == MAT_REUSE_MATRIX */ 3681 M = *newmat; 3682 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3683 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3684 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3685 /* 3686 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3687 rather than the slower MatSetValues(). 3688 */ 3689 M->was_assembled = PETSC_TRUE; 3690 M->assembled = PETSC_FALSE; 3691 } 3692 3693 /* (5) Set values of Msub to *newmat */ 3694 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3695 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3696 3697 jj = aij->j; 3698 ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3699 for (i=0; i<m; i++) { 3700 row = rstart + i; 3701 nz = ii[i+1] - ii[i]; 3702 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3703 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3704 jj += nz; aa += nz; 3705 } 3706 ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3707 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3708 3709 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3710 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3711 3712 ierr = PetscFree(colsub);CHKERRQ(ierr); 3713 3714 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3715 if (call == MAT_INITIAL_MATRIX) { 3716 *newmat = M; 3717 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3718 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3719 3720 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3721 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3722 3723 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3724 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3725 3726 if (iscol_local) { 3727 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3728 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3729 } 3730 } 3731 PetscFunctionReturn(0); 3732 } 3733 3734 /* 3735 Not great since it makes two copies of the submatrix, first an SeqAIJ 3736 in local and then by concatenating the local matrices the end result. 3737 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3738 3739 Note: This requires a sequential iscol with all indices. 3740 */ 3741 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3742 { 3743 PetscErrorCode ierr; 3744 PetscMPIInt rank,size; 3745 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3746 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3747 Mat M,Mreuse; 3748 MatScalar *aa,*vwork; 3749 MPI_Comm comm; 3750 Mat_SeqAIJ *aij; 3751 PetscBool colflag,allcolumns=PETSC_FALSE; 3752 3753 PetscFunctionBegin; 3754 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3755 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3756 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3757 3758 /* Check for special case: each processor gets entire matrix columns */ 3759 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3760 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3761 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3762 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 3763 3764 if (call == MAT_REUSE_MATRIX) { 3765 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3766 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3767 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3768 } else { 3769 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3770 } 3771 3772 /* 3773 m - number of local rows 3774 n - number of columns (same on all processors) 3775 rstart - first row in new global matrix generated 3776 */ 3777 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3778 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3779 if (call == MAT_INITIAL_MATRIX) { 3780 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3781 ii = aij->i; 3782 jj = aij->j; 3783 3784 /* 3785 Determine the number of non-zeros in the diagonal and off-diagonal 3786 portions of the matrix in order to do correct preallocation 3787 */ 3788 3789 /* first get start and end of "diagonal" columns */ 3790 if (csize == PETSC_DECIDE) { 3791 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3792 if (mglobal == n) { /* square matrix */ 3793 nlocal = m; 3794 } else { 3795 nlocal = n/size + ((n % size) > rank); 3796 } 3797 } else { 3798 nlocal = csize; 3799 } 3800 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3801 rstart = rend - nlocal; 3802 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3803 3804 /* next, compute all the lengths */ 3805 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3806 olens = dlens + m; 3807 for (i=0; i<m; i++) { 3808 jend = ii[i+1] - ii[i]; 3809 olen = 0; 3810 dlen = 0; 3811 for (j=0; j<jend; j++) { 3812 if (*jj < rstart || *jj >= rend) olen++; 3813 else dlen++; 3814 jj++; 3815 } 3816 olens[i] = olen; 3817 dlens[i] = dlen; 3818 } 3819 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3820 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3821 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3822 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3823 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3824 ierr = PetscFree(dlens);CHKERRQ(ierr); 3825 } else { 3826 PetscInt ml,nl; 3827 3828 M = *newmat; 3829 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3830 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3831 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3832 /* 3833 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3834 rather than the slower MatSetValues(). 3835 */ 3836 M->was_assembled = PETSC_TRUE; 3837 M->assembled = PETSC_FALSE; 3838 } 3839 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3840 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3841 ii = aij->i; 3842 jj = aij->j; 3843 3844 /* trigger copy to CPU if needed */ 3845 ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3846 for (i=0; i<m; i++) { 3847 row = rstart + i; 3848 nz = ii[i+1] - ii[i]; 3849 cwork = jj; jj += nz; 3850 vwork = aa; aa += nz; 3851 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3852 } 3853 ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3854 3855 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3856 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3857 *newmat = M; 3858 3859 /* save submatrix used in processor for next request */ 3860 if (call == MAT_INITIAL_MATRIX) { 3861 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3862 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3863 } 3864 PetscFunctionReturn(0); 3865 } 3866 3867 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3868 { 3869 PetscInt m,cstart, cend,j,nnz,i,d; 3870 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3871 const PetscInt *JJ; 3872 PetscErrorCode ierr; 3873 PetscBool nooffprocentries; 3874 3875 PetscFunctionBegin; 3876 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3877 3878 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3879 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3880 m = B->rmap->n; 3881 cstart = B->cmap->rstart; 3882 cend = B->cmap->rend; 3883 rstart = B->rmap->rstart; 3884 3885 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3886 3887 if (PetscDefined(USE_DEBUG)) { 3888 for (i=0; i<m; i++) { 3889 nnz = Ii[i+1]- Ii[i]; 3890 JJ = J + Ii[i]; 3891 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3892 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3893 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3894 } 3895 } 3896 3897 for (i=0; i<m; i++) { 3898 nnz = Ii[i+1]- Ii[i]; 3899 JJ = J + Ii[i]; 3900 nnz_max = PetscMax(nnz_max,nnz); 3901 d = 0; 3902 for (j=0; j<nnz; j++) { 3903 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3904 } 3905 d_nnz[i] = d; 3906 o_nnz[i] = nnz - d; 3907 } 3908 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3909 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3910 3911 for (i=0; i<m; i++) { 3912 ii = i + rstart; 3913 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3914 } 3915 nooffprocentries = B->nooffprocentries; 3916 B->nooffprocentries = PETSC_TRUE; 3917 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3918 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3919 B->nooffprocentries = nooffprocentries; 3920 3921 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3922 PetscFunctionReturn(0); 3923 } 3924 3925 /*@ 3926 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3927 (the default parallel PETSc format). 3928 3929 Collective 3930 3931 Input Parameters: 3932 + B - the matrix 3933 . i - the indices into j for the start of each local row (starts with zero) 3934 . j - the column indices for each local row (starts with zero) 3935 - v - optional values in the matrix 3936 3937 Level: developer 3938 3939 Notes: 3940 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3941 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3942 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3943 3944 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3945 3946 The format which is used for the sparse matrix input, is equivalent to a 3947 row-major ordering.. i.e for the following matrix, the input data expected is 3948 as shown 3949 3950 $ 1 0 0 3951 $ 2 0 3 P0 3952 $ ------- 3953 $ 4 5 6 P1 3954 $ 3955 $ Process0 [P0]: rows_owned=[0,1] 3956 $ i = {0,1,3} [size = nrow+1 = 2+1] 3957 $ j = {0,0,2} [size = 3] 3958 $ v = {1,2,3} [size = 3] 3959 $ 3960 $ Process1 [P1]: rows_owned=[2] 3961 $ i = {0,3} [size = nrow+1 = 1+1] 3962 $ j = {0,1,2} [size = 3] 3963 $ v = {4,5,6} [size = 3] 3964 3965 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3966 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3967 @*/ 3968 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3969 { 3970 PetscErrorCode ierr; 3971 3972 PetscFunctionBegin; 3973 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3974 PetscFunctionReturn(0); 3975 } 3976 3977 /*@C 3978 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3979 (the default parallel PETSc format). For good matrix assembly performance 3980 the user should preallocate the matrix storage by setting the parameters 3981 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3982 performance can be increased by more than a factor of 50. 3983 3984 Collective 3985 3986 Input Parameters: 3987 + B - the matrix 3988 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3989 (same value is used for all local rows) 3990 . d_nnz - array containing the number of nonzeros in the various rows of the 3991 DIAGONAL portion of the local submatrix (possibly different for each row) 3992 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3993 The size of this array is equal to the number of local rows, i.e 'm'. 3994 For matrices that will be factored, you must leave room for (and set) 3995 the diagonal entry even if it is zero. 3996 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3997 submatrix (same value is used for all local rows). 3998 - o_nnz - array containing the number of nonzeros in the various rows of the 3999 OFF-DIAGONAL portion of the local submatrix (possibly different for 4000 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4001 structure. The size of this array is equal to the number 4002 of local rows, i.e 'm'. 4003 4004 If the *_nnz parameter is given then the *_nz parameter is ignored 4005 4006 The AIJ format (also called the Yale sparse matrix format or 4007 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4008 storage. The stored row and column indices begin with zero. 4009 See Users-Manual: ch_mat for details. 4010 4011 The parallel matrix is partitioned such that the first m0 rows belong to 4012 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4013 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4014 4015 The DIAGONAL portion of the local submatrix of a processor can be defined 4016 as the submatrix which is obtained by extraction the part corresponding to 4017 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4018 first row that belongs to the processor, r2 is the last row belonging to 4019 the this processor, and c1-c2 is range of indices of the local part of a 4020 vector suitable for applying the matrix to. This is an mxn matrix. In the 4021 common case of a square matrix, the row and column ranges are the same and 4022 the DIAGONAL part is also square. The remaining portion of the local 4023 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4024 4025 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4026 4027 You can call MatGetInfo() to get information on how effective the preallocation was; 4028 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4029 You can also run with the option -info and look for messages with the string 4030 malloc in them to see if additional memory allocation was needed. 4031 4032 Example usage: 4033 4034 Consider the following 8x8 matrix with 34 non-zero values, that is 4035 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4036 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4037 as follows: 4038 4039 .vb 4040 1 2 0 | 0 3 0 | 0 4 4041 Proc0 0 5 6 | 7 0 0 | 8 0 4042 9 0 10 | 11 0 0 | 12 0 4043 ------------------------------------- 4044 13 0 14 | 15 16 17 | 0 0 4045 Proc1 0 18 0 | 19 20 21 | 0 0 4046 0 0 0 | 22 23 0 | 24 0 4047 ------------------------------------- 4048 Proc2 25 26 27 | 0 0 28 | 29 0 4049 30 0 0 | 31 32 33 | 0 34 4050 .ve 4051 4052 This can be represented as a collection of submatrices as: 4053 4054 .vb 4055 A B C 4056 D E F 4057 G H I 4058 .ve 4059 4060 Where the submatrices A,B,C are owned by proc0, D,E,F are 4061 owned by proc1, G,H,I are owned by proc2. 4062 4063 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4064 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4065 The 'M','N' parameters are 8,8, and have the same values on all procs. 4066 4067 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4068 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4069 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4070 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4071 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4072 matrix, ans [DF] as another SeqAIJ matrix. 4073 4074 When d_nz, o_nz parameters are specified, d_nz storage elements are 4075 allocated for every row of the local diagonal submatrix, and o_nz 4076 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4077 One way to choose d_nz and o_nz is to use the max nonzerors per local 4078 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4079 In this case, the values of d_nz,o_nz are: 4080 .vb 4081 proc0 : dnz = 2, o_nz = 2 4082 proc1 : dnz = 3, o_nz = 2 4083 proc2 : dnz = 1, o_nz = 4 4084 .ve 4085 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4086 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4087 for proc3. i.e we are using 12+15+10=37 storage locations to store 4088 34 values. 4089 4090 When d_nnz, o_nnz parameters are specified, the storage is specified 4091 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4092 In the above case the values for d_nnz,o_nnz are: 4093 .vb 4094 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4095 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4096 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4097 .ve 4098 Here the space allocated is sum of all the above values i.e 34, and 4099 hence pre-allocation is perfect. 4100 4101 Level: intermediate 4102 4103 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4104 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4105 @*/ 4106 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4107 { 4108 PetscErrorCode ierr; 4109 4110 PetscFunctionBegin; 4111 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4112 PetscValidType(B,1); 4113 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4114 PetscFunctionReturn(0); 4115 } 4116 4117 /*@ 4118 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4119 CSR format for the local rows. 4120 4121 Collective 4122 4123 Input Parameters: 4124 + comm - MPI communicator 4125 . m - number of local rows (Cannot be PETSC_DECIDE) 4126 . n - This value should be the same as the local size used in creating the 4127 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4128 calculated if N is given) For square matrices n is almost always m. 4129 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4130 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4131 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4132 . j - column indices 4133 - a - matrix values 4134 4135 Output Parameter: 4136 . mat - the matrix 4137 4138 Level: intermediate 4139 4140 Notes: 4141 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4142 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4143 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4144 4145 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4146 4147 The format which is used for the sparse matrix input, is equivalent to a 4148 row-major ordering.. i.e for the following matrix, the input data expected is 4149 as shown 4150 4151 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4152 4153 $ 1 0 0 4154 $ 2 0 3 P0 4155 $ ------- 4156 $ 4 5 6 P1 4157 $ 4158 $ Process0 [P0]: rows_owned=[0,1] 4159 $ i = {0,1,3} [size = nrow+1 = 2+1] 4160 $ j = {0,0,2} [size = 3] 4161 $ v = {1,2,3} [size = 3] 4162 $ 4163 $ Process1 [P1]: rows_owned=[2] 4164 $ i = {0,3} [size = nrow+1 = 1+1] 4165 $ j = {0,1,2} [size = 3] 4166 $ v = {4,5,6} [size = 3] 4167 4168 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4169 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4170 @*/ 4171 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4172 { 4173 PetscErrorCode ierr; 4174 4175 PetscFunctionBegin; 4176 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4177 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4178 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4179 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4180 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4181 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4182 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4183 PetscFunctionReturn(0); 4184 } 4185 4186 /*@ 4187 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4188 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4189 4190 Collective 4191 4192 Input Parameters: 4193 + mat - the matrix 4194 . m - number of local rows (Cannot be PETSC_DECIDE) 4195 . n - This value should be the same as the local size used in creating the 4196 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4197 calculated if N is given) For square matrices n is almost always m. 4198 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4199 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4200 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4201 . J - column indices 4202 - v - matrix values 4203 4204 Level: intermediate 4205 4206 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4207 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4208 @*/ 4209 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4210 { 4211 PetscErrorCode ierr; 4212 PetscInt cstart,nnz,i,j; 4213 PetscInt *ld; 4214 PetscBool nooffprocentries; 4215 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4216 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4217 PetscScalar *ad = Ad->a, *ao = Ao->a; 4218 const PetscInt *Adi = Ad->i; 4219 PetscInt ldi,Iii,md; 4220 4221 PetscFunctionBegin; 4222 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4223 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4224 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4225 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4226 4227 cstart = mat->cmap->rstart; 4228 if (!Aij->ld) { 4229 /* count number of entries below block diagonal */ 4230 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4231 Aij->ld = ld; 4232 for (i=0; i<m; i++) { 4233 nnz = Ii[i+1]- Ii[i]; 4234 j = 0; 4235 while (J[j] < cstart && j < nnz) {j++;} 4236 J += nnz; 4237 ld[i] = j; 4238 } 4239 } else { 4240 ld = Aij->ld; 4241 } 4242 4243 for (i=0; i<m; i++) { 4244 nnz = Ii[i+1]- Ii[i]; 4245 Iii = Ii[i]; 4246 ldi = ld[i]; 4247 md = Adi[i+1]-Adi[i]; 4248 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4249 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4250 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4251 ad += md; 4252 ao += nnz - md; 4253 } 4254 nooffprocentries = mat->nooffprocentries; 4255 mat->nooffprocentries = PETSC_TRUE; 4256 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4257 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4258 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4259 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4260 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4261 mat->nooffprocentries = nooffprocentries; 4262 PetscFunctionReturn(0); 4263 } 4264 4265 /*@C 4266 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4267 (the default parallel PETSc format). For good matrix assembly performance 4268 the user should preallocate the matrix storage by setting the parameters 4269 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4270 performance can be increased by more than a factor of 50. 4271 4272 Collective 4273 4274 Input Parameters: 4275 + comm - MPI communicator 4276 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4277 This value should be the same as the local size used in creating the 4278 y vector for the matrix-vector product y = Ax. 4279 . n - This value should be the same as the local size used in creating the 4280 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4281 calculated if N is given) For square matrices n is almost always m. 4282 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4283 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4284 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4285 (same value is used for all local rows) 4286 . d_nnz - array containing the number of nonzeros in the various rows of the 4287 DIAGONAL portion of the local submatrix (possibly different for each row) 4288 or NULL, if d_nz is used to specify the nonzero structure. 4289 The size of this array is equal to the number of local rows, i.e 'm'. 4290 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4291 submatrix (same value is used for all local rows). 4292 - o_nnz - array containing the number of nonzeros in the various rows of the 4293 OFF-DIAGONAL portion of the local submatrix (possibly different for 4294 each row) or NULL, if o_nz is used to specify the nonzero 4295 structure. The size of this array is equal to the number 4296 of local rows, i.e 'm'. 4297 4298 Output Parameter: 4299 . A - the matrix 4300 4301 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4302 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4303 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4304 4305 Notes: 4306 If the *_nnz parameter is given then the *_nz parameter is ignored 4307 4308 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4309 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4310 storage requirements for this matrix. 4311 4312 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4313 processor than it must be used on all processors that share the object for 4314 that argument. 4315 4316 The user MUST specify either the local or global matrix dimensions 4317 (possibly both). 4318 4319 The parallel matrix is partitioned across processors such that the 4320 first m0 rows belong to process 0, the next m1 rows belong to 4321 process 1, the next m2 rows belong to process 2 etc.. where 4322 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4323 values corresponding to [m x N] submatrix. 4324 4325 The columns are logically partitioned with the n0 columns belonging 4326 to 0th partition, the next n1 columns belonging to the next 4327 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4328 4329 The DIAGONAL portion of the local submatrix on any given processor 4330 is the submatrix corresponding to the rows and columns m,n 4331 corresponding to the given processor. i.e diagonal matrix on 4332 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4333 etc. The remaining portion of the local submatrix [m x (N-n)] 4334 constitute the OFF-DIAGONAL portion. The example below better 4335 illustrates this concept. 4336 4337 For a square global matrix we define each processor's diagonal portion 4338 to be its local rows and the corresponding columns (a square submatrix); 4339 each processor's off-diagonal portion encompasses the remainder of the 4340 local matrix (a rectangular submatrix). 4341 4342 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4343 4344 When calling this routine with a single process communicator, a matrix of 4345 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4346 type of communicator, use the construction mechanism 4347 .vb 4348 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4349 .ve 4350 4351 $ MatCreate(...,&A); 4352 $ MatSetType(A,MATMPIAIJ); 4353 $ MatSetSizes(A, m,n,M,N); 4354 $ MatMPIAIJSetPreallocation(A,...); 4355 4356 By default, this format uses inodes (identical nodes) when possible. 4357 We search for consecutive rows with the same nonzero structure, thereby 4358 reusing matrix information to achieve increased efficiency. 4359 4360 Options Database Keys: 4361 + -mat_no_inode - Do not use inodes 4362 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4363 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4364 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4365 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4366 4367 Example usage: 4368 4369 Consider the following 8x8 matrix with 34 non-zero values, that is 4370 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4371 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4372 as follows 4373 4374 .vb 4375 1 2 0 | 0 3 0 | 0 4 4376 Proc0 0 5 6 | 7 0 0 | 8 0 4377 9 0 10 | 11 0 0 | 12 0 4378 ------------------------------------- 4379 13 0 14 | 15 16 17 | 0 0 4380 Proc1 0 18 0 | 19 20 21 | 0 0 4381 0 0 0 | 22 23 0 | 24 0 4382 ------------------------------------- 4383 Proc2 25 26 27 | 0 0 28 | 29 0 4384 30 0 0 | 31 32 33 | 0 34 4385 .ve 4386 4387 This can be represented as a collection of submatrices as 4388 4389 .vb 4390 A B C 4391 D E F 4392 G H I 4393 .ve 4394 4395 Where the submatrices A,B,C are owned by proc0, D,E,F are 4396 owned by proc1, G,H,I are owned by proc2. 4397 4398 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4399 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4400 The 'M','N' parameters are 8,8, and have the same values on all procs. 4401 4402 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4403 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4404 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4405 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4406 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4407 matrix, ans [DF] as another SeqAIJ matrix. 4408 4409 When d_nz, o_nz parameters are specified, d_nz storage elements are 4410 allocated for every row of the local diagonal submatrix, and o_nz 4411 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4412 One way to choose d_nz and o_nz is to use the max nonzerors per local 4413 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4414 In this case, the values of d_nz,o_nz are 4415 .vb 4416 proc0 : dnz = 2, o_nz = 2 4417 proc1 : dnz = 3, o_nz = 2 4418 proc2 : dnz = 1, o_nz = 4 4419 .ve 4420 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4421 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4422 for proc3. i.e we are using 12+15+10=37 storage locations to store 4423 34 values. 4424 4425 When d_nnz, o_nnz parameters are specified, the storage is specified 4426 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4427 In the above case the values for d_nnz,o_nnz are 4428 .vb 4429 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4430 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4431 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4432 .ve 4433 Here the space allocated is sum of all the above values i.e 34, and 4434 hence pre-allocation is perfect. 4435 4436 Level: intermediate 4437 4438 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4439 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4440 @*/ 4441 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4442 { 4443 PetscErrorCode ierr; 4444 PetscMPIInt size; 4445 4446 PetscFunctionBegin; 4447 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4448 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4449 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4450 if (size > 1) { 4451 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4452 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4453 } else { 4454 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4455 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4456 } 4457 PetscFunctionReturn(0); 4458 } 4459 4460 /*@C 4461 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4462 4463 Not collective 4464 4465 Input Parameter: 4466 . A - The MPIAIJ matrix 4467 4468 Output Parameters: 4469 + Ad - The local diagonal block as a SeqAIJ matrix 4470 . Ao - The local off-diagonal block as a SeqAIJ matrix 4471 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4472 4473 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4474 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4475 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4476 local column numbers to global column numbers in the original matrix. 4477 4478 Level: intermediate 4479 4480 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4481 @*/ 4482 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4483 { 4484 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4485 PetscBool flg; 4486 PetscErrorCode ierr; 4487 4488 PetscFunctionBegin; 4489 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4490 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4491 if (Ad) *Ad = a->A; 4492 if (Ao) *Ao = a->B; 4493 if (colmap) *colmap = a->garray; 4494 PetscFunctionReturn(0); 4495 } 4496 4497 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4498 { 4499 PetscErrorCode ierr; 4500 PetscInt m,N,i,rstart,nnz,Ii; 4501 PetscInt *indx; 4502 PetscScalar *values; 4503 MatType rootType; 4504 4505 PetscFunctionBegin; 4506 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4507 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4508 PetscInt *dnz,*onz,sum,bs,cbs; 4509 4510 if (n == PETSC_DECIDE) { 4511 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4512 } 4513 /* Check sum(n) = N */ 4514 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4515 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4516 4517 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 4518 rstart -= m; 4519 4520 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4521 for (i=0; i<m; i++) { 4522 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4523 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4524 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4525 } 4526 4527 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4528 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4529 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4530 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4531 ierr = MatGetRootType_Private(inmat,&rootType);CHKERRQ(ierr); 4532 ierr = MatSetType(*outmat,rootType);CHKERRQ(ierr); 4533 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4534 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4535 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4536 ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4537 } 4538 4539 /* numeric phase */ 4540 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4541 for (i=0; i<m; i++) { 4542 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4543 Ii = i + rstart; 4544 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4545 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4546 } 4547 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4548 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4549 PetscFunctionReturn(0); 4550 } 4551 4552 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4553 { 4554 PetscErrorCode ierr; 4555 PetscMPIInt rank; 4556 PetscInt m,N,i,rstart,nnz; 4557 size_t len; 4558 const PetscInt *indx; 4559 PetscViewer out; 4560 char *name; 4561 Mat B; 4562 const PetscScalar *values; 4563 4564 PetscFunctionBegin; 4565 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4566 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4567 /* Should this be the type of the diagonal block of A? */ 4568 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4569 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4570 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4571 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4572 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4573 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4574 for (i=0; i<m; i++) { 4575 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4576 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4577 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4578 } 4579 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4580 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4581 4582 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 4583 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4584 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4585 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4586 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4587 ierr = PetscFree(name);CHKERRQ(ierr); 4588 ierr = MatView(B,out);CHKERRQ(ierr); 4589 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4590 ierr = MatDestroy(&B);CHKERRQ(ierr); 4591 PetscFunctionReturn(0); 4592 } 4593 4594 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4595 { 4596 PetscErrorCode ierr; 4597 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4598 4599 PetscFunctionBegin; 4600 if (!merge) PetscFunctionReturn(0); 4601 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4602 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4603 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4604 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4605 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4606 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4607 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4608 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4609 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4610 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4611 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4612 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4613 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4614 ierr = PetscFree(merge);CHKERRQ(ierr); 4615 PetscFunctionReturn(0); 4616 } 4617 4618 #include <../src/mat/utils/freespace.h> 4619 #include <petscbt.h> 4620 4621 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4622 { 4623 PetscErrorCode ierr; 4624 MPI_Comm comm; 4625 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4626 PetscMPIInt size,rank,taga,*len_s; 4627 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4628 PetscInt proc,m; 4629 PetscInt **buf_ri,**buf_rj; 4630 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4631 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4632 MPI_Request *s_waits,*r_waits; 4633 MPI_Status *status; 4634 MatScalar *aa=a->a; 4635 MatScalar **abuf_r,*ba_i; 4636 Mat_Merge_SeqsToMPI *merge; 4637 PetscContainer container; 4638 4639 PetscFunctionBegin; 4640 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4641 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4642 4643 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4644 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4645 4646 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4647 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4648 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4649 4650 bi = merge->bi; 4651 bj = merge->bj; 4652 buf_ri = merge->buf_ri; 4653 buf_rj = merge->buf_rj; 4654 4655 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4656 owners = merge->rowmap->range; 4657 len_s = merge->len_s; 4658 4659 /* send and recv matrix values */ 4660 /*-----------------------------*/ 4661 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4662 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4663 4664 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4665 for (proc=0,k=0; proc<size; proc++) { 4666 if (!len_s[proc]) continue; 4667 i = owners[proc]; 4668 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr); 4669 k++; 4670 } 4671 4672 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);} 4673 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);} 4674 ierr = PetscFree(status);CHKERRQ(ierr); 4675 4676 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4677 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4678 4679 /* insert mat values of mpimat */ 4680 /*----------------------------*/ 4681 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4682 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4683 4684 for (k=0; k<merge->nrecv; k++) { 4685 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4686 nrows = *(buf_ri_k[k]); 4687 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4688 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4689 } 4690 4691 /* set values of ba */ 4692 m = merge->rowmap->n; 4693 for (i=0; i<m; i++) { 4694 arow = owners[rank] + i; 4695 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4696 bnzi = bi[i+1] - bi[i]; 4697 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4698 4699 /* add local non-zero vals of this proc's seqmat into ba */ 4700 anzi = ai[arow+1] - ai[arow]; 4701 aj = a->j + ai[arow]; 4702 aa = a->a + ai[arow]; 4703 nextaj = 0; 4704 for (j=0; nextaj<anzi; j++) { 4705 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4706 ba_i[j] += aa[nextaj++]; 4707 } 4708 } 4709 4710 /* add received vals into ba */ 4711 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4712 /* i-th row */ 4713 if (i == *nextrow[k]) { 4714 anzi = *(nextai[k]+1) - *nextai[k]; 4715 aj = buf_rj[k] + *(nextai[k]); 4716 aa = abuf_r[k] + *(nextai[k]); 4717 nextaj = 0; 4718 for (j=0; nextaj<anzi; j++) { 4719 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4720 ba_i[j] += aa[nextaj++]; 4721 } 4722 } 4723 nextrow[k]++; nextai[k]++; 4724 } 4725 } 4726 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4727 } 4728 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4729 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4730 4731 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4732 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4733 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4734 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4735 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4736 PetscFunctionReturn(0); 4737 } 4738 4739 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4740 { 4741 PetscErrorCode ierr; 4742 Mat B_mpi; 4743 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4744 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4745 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4746 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4747 PetscInt len,proc,*dnz,*onz,bs,cbs; 4748 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4749 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4750 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4751 MPI_Status *status; 4752 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4753 PetscBT lnkbt; 4754 Mat_Merge_SeqsToMPI *merge; 4755 PetscContainer container; 4756 4757 PetscFunctionBegin; 4758 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4759 4760 /* make sure it is a PETSc comm */ 4761 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4762 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4763 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4764 4765 ierr = PetscNew(&merge);CHKERRQ(ierr); 4766 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4767 4768 /* determine row ownership */ 4769 /*---------------------------------------------------------*/ 4770 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4771 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4772 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4773 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4774 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4775 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4776 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4777 4778 m = merge->rowmap->n; 4779 owners = merge->rowmap->range; 4780 4781 /* determine the number of messages to send, their lengths */ 4782 /*---------------------------------------------------------*/ 4783 len_s = merge->len_s; 4784 4785 len = 0; /* length of buf_si[] */ 4786 merge->nsend = 0; 4787 for (proc=0; proc<size; proc++) { 4788 len_si[proc] = 0; 4789 if (proc == rank) { 4790 len_s[proc] = 0; 4791 } else { 4792 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4793 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4794 } 4795 if (len_s[proc]) { 4796 merge->nsend++; 4797 nrows = 0; 4798 for (i=owners[proc]; i<owners[proc+1]; i++) { 4799 if (ai[i+1] > ai[i]) nrows++; 4800 } 4801 len_si[proc] = 2*(nrows+1); 4802 len += len_si[proc]; 4803 } 4804 } 4805 4806 /* determine the number and length of messages to receive for ij-structure */ 4807 /*-------------------------------------------------------------------------*/ 4808 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4809 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4810 4811 /* post the Irecv of j-structure */ 4812 /*-------------------------------*/ 4813 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4814 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4815 4816 /* post the Isend of j-structure */ 4817 /*--------------------------------*/ 4818 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4819 4820 for (proc=0, k=0; proc<size; proc++) { 4821 if (!len_s[proc]) continue; 4822 i = owners[proc]; 4823 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr); 4824 k++; 4825 } 4826 4827 /* receives and sends of j-structure are complete */ 4828 /*------------------------------------------------*/ 4829 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);} 4830 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);} 4831 4832 /* send and recv i-structure */ 4833 /*---------------------------*/ 4834 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4835 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4836 4837 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4838 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4839 for (proc=0,k=0; proc<size; proc++) { 4840 if (!len_s[proc]) continue; 4841 /* form outgoing message for i-structure: 4842 buf_si[0]: nrows to be sent 4843 [1:nrows]: row index (global) 4844 [nrows+1:2*nrows+1]: i-structure index 4845 */ 4846 /*-------------------------------------------*/ 4847 nrows = len_si[proc]/2 - 1; 4848 buf_si_i = buf_si + nrows+1; 4849 buf_si[0] = nrows; 4850 buf_si_i[0] = 0; 4851 nrows = 0; 4852 for (i=owners[proc]; i<owners[proc+1]; i++) { 4853 anzi = ai[i+1] - ai[i]; 4854 if (anzi) { 4855 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4856 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4857 nrows++; 4858 } 4859 } 4860 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr); 4861 k++; 4862 buf_si += len_si[proc]; 4863 } 4864 4865 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);} 4866 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);} 4867 4868 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4869 for (i=0; i<merge->nrecv; i++) { 4870 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4871 } 4872 4873 ierr = PetscFree(len_si);CHKERRQ(ierr); 4874 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4875 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4876 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4877 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4878 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4879 ierr = PetscFree(status);CHKERRQ(ierr); 4880 4881 /* compute a local seq matrix in each processor */ 4882 /*----------------------------------------------*/ 4883 /* allocate bi array and free space for accumulating nonzero column info */ 4884 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4885 bi[0] = 0; 4886 4887 /* create and initialize a linked list */ 4888 nlnk = N+1; 4889 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4890 4891 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4892 len = ai[owners[rank+1]] - ai[owners[rank]]; 4893 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4894 4895 current_space = free_space; 4896 4897 /* determine symbolic info for each local row */ 4898 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4899 4900 for (k=0; k<merge->nrecv; k++) { 4901 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4902 nrows = *buf_ri_k[k]; 4903 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4904 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4905 } 4906 4907 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4908 len = 0; 4909 for (i=0; i<m; i++) { 4910 bnzi = 0; 4911 /* add local non-zero cols of this proc's seqmat into lnk */ 4912 arow = owners[rank] + i; 4913 anzi = ai[arow+1] - ai[arow]; 4914 aj = a->j + ai[arow]; 4915 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4916 bnzi += nlnk; 4917 /* add received col data into lnk */ 4918 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4919 if (i == *nextrow[k]) { /* i-th row */ 4920 anzi = *(nextai[k]+1) - *nextai[k]; 4921 aj = buf_rj[k] + *nextai[k]; 4922 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4923 bnzi += nlnk; 4924 nextrow[k]++; nextai[k]++; 4925 } 4926 } 4927 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4928 4929 /* if free space is not available, make more free space */ 4930 if (current_space->local_remaining<bnzi) { 4931 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4932 nspacedouble++; 4933 } 4934 /* copy data into free space, then initialize lnk */ 4935 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4936 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4937 4938 current_space->array += bnzi; 4939 current_space->local_used += bnzi; 4940 current_space->local_remaining -= bnzi; 4941 4942 bi[i+1] = bi[i] + bnzi; 4943 } 4944 4945 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4946 4947 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4948 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4949 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4950 4951 /* create symbolic parallel matrix B_mpi */ 4952 /*---------------------------------------*/ 4953 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4954 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4955 if (n==PETSC_DECIDE) { 4956 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4957 } else { 4958 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4959 } 4960 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4961 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4962 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4963 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4964 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4965 4966 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4967 B_mpi->assembled = PETSC_FALSE; 4968 merge->bi = bi; 4969 merge->bj = bj; 4970 merge->buf_ri = buf_ri; 4971 merge->buf_rj = buf_rj; 4972 merge->coi = NULL; 4973 merge->coj = NULL; 4974 merge->owners_co = NULL; 4975 4976 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4977 4978 /* attach the supporting struct to B_mpi for reuse */ 4979 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4980 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4981 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 4982 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4983 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4984 *mpimat = B_mpi; 4985 4986 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4987 PetscFunctionReturn(0); 4988 } 4989 4990 /*@C 4991 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4992 matrices from each processor 4993 4994 Collective 4995 4996 Input Parameters: 4997 + comm - the communicators the parallel matrix will live on 4998 . seqmat - the input sequential matrices 4999 . m - number of local rows (or PETSC_DECIDE) 5000 . n - number of local columns (or PETSC_DECIDE) 5001 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5002 5003 Output Parameter: 5004 . mpimat - the parallel matrix generated 5005 5006 Level: advanced 5007 5008 Notes: 5009 The dimensions of the sequential matrix in each processor MUST be the same. 5010 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5011 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5012 @*/ 5013 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5014 { 5015 PetscErrorCode ierr; 5016 PetscMPIInt size; 5017 5018 PetscFunctionBegin; 5019 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5020 if (size == 1) { 5021 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5022 if (scall == MAT_INITIAL_MATRIX) { 5023 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5024 } else { 5025 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5026 } 5027 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5028 PetscFunctionReturn(0); 5029 } 5030 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5031 if (scall == MAT_INITIAL_MATRIX) { 5032 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5033 } 5034 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5035 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5036 PetscFunctionReturn(0); 5037 } 5038 5039 /*@ 5040 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5041 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5042 with MatGetSize() 5043 5044 Not Collective 5045 5046 Input Parameters: 5047 + A - the matrix 5048 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5049 5050 Output Parameter: 5051 . A_loc - the local sequential matrix generated 5052 5053 Level: developer 5054 5055 Notes: 5056 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5057 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5058 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5059 modify the values of the returned A_loc. 5060 5061 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 5062 @*/ 5063 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5064 { 5065 PetscErrorCode ierr; 5066 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5067 Mat_SeqAIJ *mat,*a,*b; 5068 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5069 const PetscScalar *aa,*ba,*aav,*bav; 5070 PetscScalar *ca,*cam; 5071 PetscMPIInt size; 5072 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5073 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5074 PetscBool match; 5075 5076 PetscFunctionBegin; 5077 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5078 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5079 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5080 if (size == 1) { 5081 if (scall == MAT_INITIAL_MATRIX) { 5082 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5083 *A_loc = mpimat->A; 5084 } else if (scall == MAT_REUSE_MATRIX) { 5085 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5086 } 5087 PetscFunctionReturn(0); 5088 } 5089 5090 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5091 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5092 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5093 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5094 ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5095 ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5096 aa = aav; 5097 ba = bav; 5098 if (scall == MAT_INITIAL_MATRIX) { 5099 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5100 ci[0] = 0; 5101 for (i=0; i<am; i++) { 5102 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5103 } 5104 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5105 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5106 k = 0; 5107 for (i=0; i<am; i++) { 5108 ncols_o = bi[i+1] - bi[i]; 5109 ncols_d = ai[i+1] - ai[i]; 5110 /* off-diagonal portion of A */ 5111 for (jo=0; jo<ncols_o; jo++) { 5112 col = cmap[*bj]; 5113 if (col >= cstart) break; 5114 cj[k] = col; bj++; 5115 ca[k++] = *ba++; 5116 } 5117 /* diagonal portion of A */ 5118 for (j=0; j<ncols_d; j++) { 5119 cj[k] = cstart + *aj++; 5120 ca[k++] = *aa++; 5121 } 5122 /* off-diagonal portion of A */ 5123 for (j=jo; j<ncols_o; j++) { 5124 cj[k] = cmap[*bj++]; 5125 ca[k++] = *ba++; 5126 } 5127 } 5128 /* put together the new matrix */ 5129 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5130 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5131 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5132 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5133 mat->free_a = PETSC_TRUE; 5134 mat->free_ij = PETSC_TRUE; 5135 mat->nonew = 0; 5136 } else if (scall == MAT_REUSE_MATRIX) { 5137 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5138 #if defined(PETSC_USE_DEVICE) 5139 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5140 #endif 5141 ci = mat->i; cj = mat->j; cam = mat->a; 5142 for (i=0; i<am; i++) { 5143 /* off-diagonal portion of A */ 5144 ncols_o = bi[i+1] - bi[i]; 5145 for (jo=0; jo<ncols_o; jo++) { 5146 col = cmap[*bj]; 5147 if (col >= cstart) break; 5148 *cam++ = *ba++; bj++; 5149 } 5150 /* diagonal portion of A */ 5151 ncols_d = ai[i+1] - ai[i]; 5152 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5153 /* off-diagonal portion of A */ 5154 for (j=jo; j<ncols_o; j++) { 5155 *cam++ = *ba++; bj++; 5156 } 5157 } 5158 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5159 ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr); 5160 ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr); 5161 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5162 PetscFunctionReturn(0); 5163 } 5164 5165 /*@ 5166 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5167 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5168 5169 Not Collective 5170 5171 Input Parameters: 5172 + A - the matrix 5173 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5174 5175 Output Parameters: 5176 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5177 - A_loc - the local sequential matrix generated 5178 5179 Level: developer 5180 5181 Notes: 5182 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5183 5184 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5185 5186 @*/ 5187 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5188 { 5189 PetscErrorCode ierr; 5190 Mat Ao,Ad; 5191 const PetscInt *cmap; 5192 PetscMPIInt size; 5193 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5194 5195 PetscFunctionBegin; 5196 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr); 5197 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5198 if (size == 1) { 5199 if (scall == MAT_INITIAL_MATRIX) { 5200 ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr); 5201 *A_loc = Ad; 5202 } else if (scall == MAT_REUSE_MATRIX) { 5203 ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5204 } 5205 if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); } 5206 PetscFunctionReturn(0); 5207 } 5208 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr); 5209 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5210 if (f) { 5211 ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr); 5212 } else { 5213 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5214 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5215 Mat_SeqAIJ *c; 5216 PetscInt *ai = a->i, *aj = a->j; 5217 PetscInt *bi = b->i, *bj = b->j; 5218 PetscInt *ci,*cj; 5219 const PetscScalar *aa,*ba; 5220 PetscScalar *ca; 5221 PetscInt i,j,am,dn,on; 5222 5223 ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr); 5224 ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr); 5225 ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr); 5226 ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr); 5227 if (scall == MAT_INITIAL_MATRIX) { 5228 PetscInt k; 5229 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5230 ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr); 5231 ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr); 5232 ci[0] = 0; 5233 for (i=0,k=0; i<am; i++) { 5234 const PetscInt ncols_o = bi[i+1] - bi[i]; 5235 const PetscInt ncols_d = ai[i+1] - ai[i]; 5236 ci[i+1] = ci[i] + ncols_o + ncols_d; 5237 /* diagonal portion of A */ 5238 for (j=0; j<ncols_d; j++,k++) { 5239 cj[k] = *aj++; 5240 ca[k] = *aa++; 5241 } 5242 /* off-diagonal portion of A */ 5243 for (j=0; j<ncols_o; j++,k++) { 5244 cj[k] = dn + *bj++; 5245 ca[k] = *ba++; 5246 } 5247 } 5248 /* put together the new matrix */ 5249 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr); 5250 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5251 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5252 c = (Mat_SeqAIJ*)(*A_loc)->data; 5253 c->free_a = PETSC_TRUE; 5254 c->free_ij = PETSC_TRUE; 5255 c->nonew = 0; 5256 ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr); 5257 } else if (scall == MAT_REUSE_MATRIX) { 5258 #if defined(PETSC_HAVE_DEVICE) 5259 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5260 #endif 5261 c = (Mat_SeqAIJ*)(*A_loc)->data; 5262 ca = c->a; 5263 for (i=0; i<am; i++) { 5264 const PetscInt ncols_d = ai[i+1] - ai[i]; 5265 const PetscInt ncols_o = bi[i+1] - bi[i]; 5266 /* diagonal portion of A */ 5267 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5268 /* off-diagonal portion of A */ 5269 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5270 } 5271 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5272 ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr); 5273 ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr); 5274 if (glob) { 5275 PetscInt cst, *gidx; 5276 5277 ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr); 5278 ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr); 5279 for (i=0; i<dn; i++) gidx[i] = cst + i; 5280 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5281 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr); 5282 } 5283 } 5284 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5285 PetscFunctionReturn(0); 5286 } 5287 5288 /*@C 5289 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5290 5291 Not Collective 5292 5293 Input Parameters: 5294 + A - the matrix 5295 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5296 - row, col - index sets of rows and columns to extract (or NULL) 5297 5298 Output Parameter: 5299 . A_loc - the local sequential matrix generated 5300 5301 Level: developer 5302 5303 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5304 5305 @*/ 5306 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5307 { 5308 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5309 PetscErrorCode ierr; 5310 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5311 IS isrowa,iscola; 5312 Mat *aloc; 5313 PetscBool match; 5314 5315 PetscFunctionBegin; 5316 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5317 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5318 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5319 if (!row) { 5320 start = A->rmap->rstart; end = A->rmap->rend; 5321 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5322 } else { 5323 isrowa = *row; 5324 } 5325 if (!col) { 5326 start = A->cmap->rstart; 5327 cmap = a->garray; 5328 nzA = a->A->cmap->n; 5329 nzB = a->B->cmap->n; 5330 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5331 ncols = 0; 5332 for (i=0; i<nzB; i++) { 5333 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5334 else break; 5335 } 5336 imark = i; 5337 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5338 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5339 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5340 } else { 5341 iscola = *col; 5342 } 5343 if (scall != MAT_INITIAL_MATRIX) { 5344 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5345 aloc[0] = *A_loc; 5346 } 5347 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5348 if (!col) { /* attach global id of condensed columns */ 5349 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5350 } 5351 *A_loc = aloc[0]; 5352 ierr = PetscFree(aloc);CHKERRQ(ierr); 5353 if (!row) { 5354 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5355 } 5356 if (!col) { 5357 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5358 } 5359 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5360 PetscFunctionReturn(0); 5361 } 5362 5363 /* 5364 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5365 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5366 * on a global size. 5367 * */ 5368 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5369 { 5370 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5371 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5372 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5373 PetscMPIInt owner; 5374 PetscSFNode *iremote,*oiremote; 5375 const PetscInt *lrowindices; 5376 PetscErrorCode ierr; 5377 PetscSF sf,osf; 5378 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5379 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5380 MPI_Comm comm; 5381 ISLocalToGlobalMapping mapping; 5382 5383 PetscFunctionBegin; 5384 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5385 /* plocalsize is the number of roots 5386 * nrows is the number of leaves 5387 * */ 5388 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5389 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5390 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5391 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5392 for (i=0;i<nrows;i++) { 5393 /* Find a remote index and an owner for a row 5394 * The row could be local or remote 5395 * */ 5396 owner = 0; 5397 lidx = 0; 5398 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5399 iremote[i].index = lidx; 5400 iremote[i].rank = owner; 5401 } 5402 /* Create SF to communicate how many nonzero columns for each row */ 5403 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5404 /* SF will figure out the number of nonzero colunms for each row, and their 5405 * offsets 5406 * */ 5407 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5408 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5409 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5410 5411 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5412 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5413 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5414 roffsets[0] = 0; 5415 roffsets[1] = 0; 5416 for (i=0;i<plocalsize;i++) { 5417 /* diag */ 5418 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5419 /* off diag */ 5420 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5421 /* compute offsets so that we relative location for each row */ 5422 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5423 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5424 } 5425 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5426 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5427 /* 'r' means root, and 'l' means leaf */ 5428 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5429 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5430 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr); 5431 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr); 5432 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5433 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5434 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5435 dntotalcols = 0; 5436 ontotalcols = 0; 5437 ncol = 0; 5438 for (i=0;i<nrows;i++) { 5439 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5440 ncol = PetscMax(pnnz[i],ncol); 5441 /* diag */ 5442 dntotalcols += nlcols[i*2+0]; 5443 /* off diag */ 5444 ontotalcols += nlcols[i*2+1]; 5445 } 5446 /* We do not need to figure the right number of columns 5447 * since all the calculations will be done by going through the raw data 5448 * */ 5449 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5450 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5451 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5452 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5453 /* diag */ 5454 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5455 /* off diag */ 5456 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5457 /* diag */ 5458 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5459 /* off diag */ 5460 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5461 dntotalcols = 0; 5462 ontotalcols = 0; 5463 ntotalcols = 0; 5464 for (i=0;i<nrows;i++) { 5465 owner = 0; 5466 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5467 /* Set iremote for diag matrix */ 5468 for (j=0;j<nlcols[i*2+0];j++) { 5469 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5470 iremote[dntotalcols].rank = owner; 5471 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5472 ilocal[dntotalcols++] = ntotalcols++; 5473 } 5474 /* off diag */ 5475 for (j=0;j<nlcols[i*2+1];j++) { 5476 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5477 oiremote[ontotalcols].rank = owner; 5478 oilocal[ontotalcols++] = ntotalcols++; 5479 } 5480 } 5481 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5482 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5483 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5484 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5485 /* P serves as roots and P_oth is leaves 5486 * Diag matrix 5487 * */ 5488 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5489 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5490 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5491 5492 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5493 /* Off diag */ 5494 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5495 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5496 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5497 /* We operate on the matrix internal data for saving memory */ 5498 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5499 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5500 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5501 /* Convert to global indices for diag matrix */ 5502 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5503 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5504 /* We want P_oth store global indices */ 5505 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5506 /* Use memory scalable approach */ 5507 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5508 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5509 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5510 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5511 /* Convert back to local indices */ 5512 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5513 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr); 5514 nout = 0; 5515 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5516 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5517 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5518 /* Exchange values */ 5519 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5520 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5521 /* Stop PETSc from shrinking memory */ 5522 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5523 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5524 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5525 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5526 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5527 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5528 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5529 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5530 PetscFunctionReturn(0); 5531 } 5532 5533 /* 5534 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5535 * This supports MPIAIJ and MAIJ 5536 * */ 5537 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5538 { 5539 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5540 Mat_SeqAIJ *p_oth; 5541 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5542 IS rows,map; 5543 PetscHMapI hamp; 5544 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5545 MPI_Comm comm; 5546 PetscSF sf,osf; 5547 PetscBool has; 5548 PetscErrorCode ierr; 5549 5550 PetscFunctionBegin; 5551 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5552 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5553 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5554 * and then create a submatrix (that often is an overlapping matrix) 5555 * */ 5556 if (reuse == MAT_INITIAL_MATRIX) { 5557 /* Use a hash table to figure out unique keys */ 5558 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5559 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5560 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5561 count = 0; 5562 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5563 for (i=0;i<a->B->cmap->n;i++) { 5564 key = a->garray[i]/dof; 5565 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5566 if (!has) { 5567 mapping[i] = count; 5568 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5569 } else { 5570 /* Current 'i' has the same value the previous step */ 5571 mapping[i] = count-1; 5572 } 5573 } 5574 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5575 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5576 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count); 5577 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5578 off = 0; 5579 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5580 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5581 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5582 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5583 /* In case, the matrix was already created but users want to recreate the matrix */ 5584 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5585 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5586 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5587 ierr = ISDestroy(&map);CHKERRQ(ierr); 5588 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5589 } else if (reuse == MAT_REUSE_MATRIX) { 5590 /* If matrix was already created, we simply update values using SF objects 5591 * that as attached to the matrix ealier. 5592 * */ 5593 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5594 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5595 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5596 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5597 /* Update values in place */ 5598 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5599 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5600 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5601 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr); 5602 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5603 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5604 PetscFunctionReturn(0); 5605 } 5606 5607 /*@C 5608 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5609 5610 Collective on Mat 5611 5612 Input Parameters: 5613 + A - the first matrix in mpiaij format 5614 . B - the second matrix in mpiaij format 5615 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5616 5617 Input/Output Parameters: 5618 + rowb - index sets of rows of B to extract (or NULL), modified on output 5619 - colb - index sets of columns of B to extract (or NULL), modified on output 5620 5621 Output Parameter: 5622 . B_seq - the sequential matrix generated 5623 5624 Level: developer 5625 5626 @*/ 5627 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5628 { 5629 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5630 PetscErrorCode ierr; 5631 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5632 IS isrowb,iscolb; 5633 Mat *bseq=NULL; 5634 5635 PetscFunctionBegin; 5636 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5637 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5638 } 5639 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5640 5641 if (scall == MAT_INITIAL_MATRIX) { 5642 start = A->cmap->rstart; 5643 cmap = a->garray; 5644 nzA = a->A->cmap->n; 5645 nzB = a->B->cmap->n; 5646 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5647 ncols = 0; 5648 for (i=0; i<nzB; i++) { /* row < local row index */ 5649 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5650 else break; 5651 } 5652 imark = i; 5653 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5654 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5655 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5656 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5657 } else { 5658 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5659 isrowb = *rowb; iscolb = *colb; 5660 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5661 bseq[0] = *B_seq; 5662 } 5663 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5664 *B_seq = bseq[0]; 5665 ierr = PetscFree(bseq);CHKERRQ(ierr); 5666 if (!rowb) { 5667 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5668 } else { 5669 *rowb = isrowb; 5670 } 5671 if (!colb) { 5672 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5673 } else { 5674 *colb = iscolb; 5675 } 5676 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5677 PetscFunctionReturn(0); 5678 } 5679 5680 /* 5681 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5682 of the OFF-DIAGONAL portion of local A 5683 5684 Collective on Mat 5685 5686 Input Parameters: 5687 + A,B - the matrices in mpiaij format 5688 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5689 5690 Output Parameter: 5691 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5692 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5693 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5694 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5695 5696 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5697 for this matrix. This is not desirable.. 5698 5699 Level: developer 5700 5701 */ 5702 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5703 { 5704 PetscErrorCode ierr; 5705 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5706 Mat_SeqAIJ *b_oth; 5707 VecScatter ctx; 5708 MPI_Comm comm; 5709 const PetscMPIInt *rprocs,*sprocs; 5710 const PetscInt *srow,*rstarts,*sstarts; 5711 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5712 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5713 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5714 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5715 PetscMPIInt size,tag,rank,nreqs; 5716 5717 PetscFunctionBegin; 5718 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5719 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5720 5721 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5722 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5723 } 5724 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5725 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 5726 5727 if (size == 1) { 5728 startsj_s = NULL; 5729 bufa_ptr = NULL; 5730 *B_oth = NULL; 5731 PetscFunctionReturn(0); 5732 } 5733 5734 ctx = a->Mvctx; 5735 tag = ((PetscObject)ctx)->tag; 5736 5737 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5738 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5739 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5740 ierr = PetscMPIIntCast(nsends+nrecvs,&nreqs);CHKERRQ(ierr); 5741 ierr = PetscMalloc1(nreqs,&reqs);CHKERRQ(ierr); 5742 rwaits = reqs; 5743 swaits = reqs + nrecvs; 5744 5745 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5746 if (scall == MAT_INITIAL_MATRIX) { 5747 /* i-array */ 5748 /*---------*/ 5749 /* post receives */ 5750 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5751 for (i=0; i<nrecvs; i++) { 5752 rowlen = rvalues + rstarts[i]*rbs; 5753 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5754 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5755 } 5756 5757 /* pack the outgoing message */ 5758 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5759 5760 sstartsj[0] = 0; 5761 rstartsj[0] = 0; 5762 len = 0; /* total length of j or a array to be sent */ 5763 if (nsends) { 5764 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5765 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5766 } 5767 for (i=0; i<nsends; i++) { 5768 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5769 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5770 for (j=0; j<nrows; j++) { 5771 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5772 for (l=0; l<sbs; l++) { 5773 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5774 5775 rowlen[j*sbs+l] = ncols; 5776 5777 len += ncols; 5778 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5779 } 5780 k++; 5781 } 5782 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5783 5784 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5785 } 5786 /* recvs and sends of i-array are completed */ 5787 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5788 ierr = PetscFree(svalues);CHKERRQ(ierr); 5789 5790 /* allocate buffers for sending j and a arrays */ 5791 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5792 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5793 5794 /* create i-array of B_oth */ 5795 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5796 5797 b_othi[0] = 0; 5798 len = 0; /* total length of j or a array to be received */ 5799 k = 0; 5800 for (i=0; i<nrecvs; i++) { 5801 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5802 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5803 for (j=0; j<nrows; j++) { 5804 b_othi[k+1] = b_othi[k] + rowlen[j]; 5805 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5806 k++; 5807 } 5808 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5809 } 5810 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5811 5812 /* allocate space for j and a arrrays of B_oth */ 5813 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5814 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5815 5816 /* j-array */ 5817 /*---------*/ 5818 /* post receives of j-array */ 5819 for (i=0; i<nrecvs; i++) { 5820 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5821 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5822 } 5823 5824 /* pack the outgoing message j-array */ 5825 if (nsends) k = sstarts[0]; 5826 for (i=0; i<nsends; i++) { 5827 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5828 bufJ = bufj+sstartsj[i]; 5829 for (j=0; j<nrows; j++) { 5830 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5831 for (ll=0; ll<sbs; ll++) { 5832 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5833 for (l=0; l<ncols; l++) { 5834 *bufJ++ = cols[l]; 5835 } 5836 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5837 } 5838 } 5839 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5840 } 5841 5842 /* recvs and sends of j-array are completed */ 5843 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5844 } else if (scall == MAT_REUSE_MATRIX) { 5845 sstartsj = *startsj_s; 5846 rstartsj = *startsj_r; 5847 bufa = *bufa_ptr; 5848 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5849 b_otha = b_oth->a; 5850 #if defined(PETSC_HAVE_DEVICE) 5851 (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU; 5852 #endif 5853 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5854 5855 /* a-array */ 5856 /*---------*/ 5857 /* post receives of a-array */ 5858 for (i=0; i<nrecvs; i++) { 5859 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5860 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr); 5861 } 5862 5863 /* pack the outgoing message a-array */ 5864 if (nsends) k = sstarts[0]; 5865 for (i=0; i<nsends; i++) { 5866 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5867 bufA = bufa+sstartsj[i]; 5868 for (j=0; j<nrows; j++) { 5869 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5870 for (ll=0; ll<sbs; ll++) { 5871 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5872 for (l=0; l<ncols; l++) { 5873 *bufA++ = vals[l]; 5874 } 5875 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5876 } 5877 } 5878 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5879 } 5880 /* recvs and sends of a-array are completed */ 5881 if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5882 ierr = PetscFree(reqs);CHKERRQ(ierr); 5883 5884 if (scall == MAT_INITIAL_MATRIX) { 5885 /* put together the new matrix */ 5886 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5887 5888 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5889 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5890 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5891 b_oth->free_a = PETSC_TRUE; 5892 b_oth->free_ij = PETSC_TRUE; 5893 b_oth->nonew = 0; 5894 5895 ierr = PetscFree(bufj);CHKERRQ(ierr); 5896 if (!startsj_s || !bufa_ptr) { 5897 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5898 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5899 } else { 5900 *startsj_s = sstartsj; 5901 *startsj_r = rstartsj; 5902 *bufa_ptr = bufa; 5903 } 5904 } 5905 5906 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5907 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5908 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5909 PetscFunctionReturn(0); 5910 } 5911 5912 /*@C 5913 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5914 5915 Not Collective 5916 5917 Input Parameter: 5918 . A - The matrix in mpiaij format 5919 5920 Output Parameters: 5921 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5922 . colmap - A map from global column index to local index into lvec 5923 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5924 5925 Level: developer 5926 5927 @*/ 5928 #if defined(PETSC_USE_CTABLE) 5929 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5930 #else 5931 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5932 #endif 5933 { 5934 Mat_MPIAIJ *a; 5935 5936 PetscFunctionBegin; 5937 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5938 PetscValidPointer(lvec, 2); 5939 PetscValidPointer(colmap, 3); 5940 PetscValidPointer(multScatter, 4); 5941 a = (Mat_MPIAIJ*) A->data; 5942 if (lvec) *lvec = a->lvec; 5943 if (colmap) *colmap = a->colmap; 5944 if (multScatter) *multScatter = a->Mvctx; 5945 PetscFunctionReturn(0); 5946 } 5947 5948 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5949 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5950 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5951 #if defined(PETSC_HAVE_MKL_SPARSE) 5952 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5953 #endif 5954 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5955 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5956 #if defined(PETSC_HAVE_ELEMENTAL) 5957 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5958 #endif 5959 #if defined(PETSC_HAVE_SCALAPACK) 5960 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5961 #endif 5962 #if defined(PETSC_HAVE_HYPRE) 5963 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5964 #endif 5965 #if defined(PETSC_HAVE_CUDA) 5966 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5967 #endif 5968 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5969 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5970 #endif 5971 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5972 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5973 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5974 5975 /* 5976 Computes (B'*A')' since computing B*A directly is untenable 5977 5978 n p p 5979 [ ] [ ] [ ] 5980 m [ A ] * n [ B ] = m [ C ] 5981 [ ] [ ] [ ] 5982 5983 */ 5984 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5985 { 5986 PetscErrorCode ierr; 5987 Mat At,Bt,Ct; 5988 5989 PetscFunctionBegin; 5990 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5991 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5992 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5993 ierr = MatDestroy(&At);CHKERRQ(ierr); 5994 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5995 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5996 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5997 PetscFunctionReturn(0); 5998 } 5999 6000 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 6001 { 6002 PetscErrorCode ierr; 6003 PetscBool cisdense; 6004 6005 PetscFunctionBegin; 6006 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 6007 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 6008 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 6009 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 6010 if (!cisdense) { 6011 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6012 } 6013 ierr = MatSetUp(C);CHKERRQ(ierr); 6014 6015 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6016 PetscFunctionReturn(0); 6017 } 6018 6019 /* ----------------------------------------------------------------*/ 6020 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6021 { 6022 Mat_Product *product = C->product; 6023 Mat A = product->A,B=product->B; 6024 6025 PetscFunctionBegin; 6026 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6027 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6028 6029 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6030 C->ops->productsymbolic = MatProductSymbolic_AB; 6031 PetscFunctionReturn(0); 6032 } 6033 6034 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6035 { 6036 PetscErrorCode ierr; 6037 Mat_Product *product = C->product; 6038 6039 PetscFunctionBegin; 6040 if (product->type == MATPRODUCT_AB) { 6041 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 6042 } 6043 PetscFunctionReturn(0); 6044 } 6045 /* ----------------------------------------------------------------*/ 6046 6047 /*MC 6048 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6049 6050 Options Database Keys: 6051 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6052 6053 Level: beginner 6054 6055 Notes: 6056 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6057 in this case the values associated with the rows and columns one passes in are set to zero 6058 in the matrix 6059 6060 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6061 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6062 6063 .seealso: MatCreateAIJ() 6064 M*/ 6065 6066 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6067 { 6068 Mat_MPIAIJ *b; 6069 PetscErrorCode ierr; 6070 PetscMPIInt size; 6071 6072 PetscFunctionBegin; 6073 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 6074 6075 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6076 B->data = (void*)b; 6077 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6078 B->assembled = PETSC_FALSE; 6079 B->insertmode = NOT_SET_VALUES; 6080 b->size = size; 6081 6082 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr); 6083 6084 /* build cache for off array entries formed */ 6085 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6086 6087 b->donotstash = PETSC_FALSE; 6088 b->colmap = NULL; 6089 b->garray = NULL; 6090 b->roworiented = PETSC_TRUE; 6091 6092 /* stuff used for matrix vector multiply */ 6093 b->lvec = NULL; 6094 b->Mvctx = NULL; 6095 6096 /* stuff for MatGetRow() */ 6097 b->rowindices = NULL; 6098 b->rowvalues = NULL; 6099 b->getrowactive = PETSC_FALSE; 6100 6101 /* flexible pointer used in CUSPARSE classes */ 6102 b->spptr = NULL; 6103 6104 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6105 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6106 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6107 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6108 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6109 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6110 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6111 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6112 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6113 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6114 #if defined(PETSC_HAVE_CUDA) 6115 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr); 6116 #endif 6117 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6118 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr); 6119 #endif 6120 #if defined(PETSC_HAVE_MKL_SPARSE) 6121 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6122 #endif 6123 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6124 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6125 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6126 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr); 6127 #if defined(PETSC_HAVE_ELEMENTAL) 6128 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6129 #endif 6130 #if defined(PETSC_HAVE_SCALAPACK) 6131 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6132 #endif 6133 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6134 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6135 #if defined(PETSC_HAVE_HYPRE) 6136 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6137 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6138 #endif 6139 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6140 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6141 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6142 PetscFunctionReturn(0); 6143 } 6144 6145 /*@C 6146 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6147 and "off-diagonal" part of the matrix in CSR format. 6148 6149 Collective 6150 6151 Input Parameters: 6152 + comm - MPI communicator 6153 . m - number of local rows (Cannot be PETSC_DECIDE) 6154 . n - This value should be the same as the local size used in creating the 6155 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6156 calculated if N is given) For square matrices n is almost always m. 6157 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6158 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6159 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6160 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6161 . a - matrix values 6162 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6163 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6164 - oa - matrix values 6165 6166 Output Parameter: 6167 . mat - the matrix 6168 6169 Level: advanced 6170 6171 Notes: 6172 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6173 must free the arrays once the matrix has been destroyed and not before. 6174 6175 The i and j indices are 0 based 6176 6177 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6178 6179 This sets local rows and cannot be used to set off-processor values. 6180 6181 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6182 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6183 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6184 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6185 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6186 communication if it is known that only local entries will be set. 6187 6188 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6189 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6190 @*/ 6191 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6192 { 6193 PetscErrorCode ierr; 6194 Mat_MPIAIJ *maij; 6195 6196 PetscFunctionBegin; 6197 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6198 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6199 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6200 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6201 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6202 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6203 maij = (Mat_MPIAIJ*) (*mat)->data; 6204 6205 (*mat)->preallocated = PETSC_TRUE; 6206 6207 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6208 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6209 6210 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6211 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6212 6213 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6214 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6215 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6216 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6217 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6218 PetscFunctionReturn(0); 6219 } 6220 6221 /* 6222 Special version for direct calls from Fortran 6223 */ 6224 #include <petsc/private/fortranimpl.h> 6225 6226 /* Change these macros so can be used in void function */ 6227 #undef CHKERRQ 6228 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6229 #undef SETERRQ2 6230 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6231 #undef SETERRQ3 6232 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6233 #undef SETERRQ 6234 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6235 6236 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6237 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6238 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6239 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6240 #else 6241 #endif 6242 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6243 { 6244 Mat mat = *mmat; 6245 PetscInt m = *mm, n = *mn; 6246 InsertMode addv = *maddv; 6247 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6248 PetscScalar value; 6249 PetscErrorCode ierr; 6250 6251 MatCheckPreallocated(mat,1); 6252 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6253 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6254 { 6255 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6256 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6257 PetscBool roworiented = aij->roworiented; 6258 6259 /* Some Variables required in the macro */ 6260 Mat A = aij->A; 6261 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6262 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6263 MatScalar *aa = a->a; 6264 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6265 Mat B = aij->B; 6266 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6267 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6268 MatScalar *ba = b->a; 6269 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6270 * cannot use "#if defined" inside a macro. */ 6271 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6272 6273 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6274 PetscInt nonew = a->nonew; 6275 MatScalar *ap1,*ap2; 6276 6277 PetscFunctionBegin; 6278 for (i=0; i<m; i++) { 6279 if (im[i] < 0) continue; 6280 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6281 if (im[i] >= rstart && im[i] < rend) { 6282 row = im[i] - rstart; 6283 lastcol1 = -1; 6284 rp1 = aj + ai[row]; 6285 ap1 = aa + ai[row]; 6286 rmax1 = aimax[row]; 6287 nrow1 = ailen[row]; 6288 low1 = 0; 6289 high1 = nrow1; 6290 lastcol2 = -1; 6291 rp2 = bj + bi[row]; 6292 ap2 = ba + bi[row]; 6293 rmax2 = bimax[row]; 6294 nrow2 = bilen[row]; 6295 low2 = 0; 6296 high2 = nrow2; 6297 6298 for (j=0; j<n; j++) { 6299 if (roworiented) value = v[i*n+j]; 6300 else value = v[i+j*m]; 6301 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6302 if (in[j] >= cstart && in[j] < cend) { 6303 col = in[j] - cstart; 6304 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6305 #if defined(PETSC_HAVE_DEVICE) 6306 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6307 #endif 6308 } else if (in[j] < 0) continue; 6309 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6310 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6311 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6312 } else { 6313 if (mat->was_assembled) { 6314 if (!aij->colmap) { 6315 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6316 } 6317 #if defined(PETSC_USE_CTABLE) 6318 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6319 col--; 6320 #else 6321 col = aij->colmap[in[j]] - 1; 6322 #endif 6323 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6324 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6325 col = in[j]; 6326 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6327 B = aij->B; 6328 b = (Mat_SeqAIJ*)B->data; 6329 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6330 rp2 = bj + bi[row]; 6331 ap2 = ba + bi[row]; 6332 rmax2 = bimax[row]; 6333 nrow2 = bilen[row]; 6334 low2 = 0; 6335 high2 = nrow2; 6336 bm = aij->B->rmap->n; 6337 ba = b->a; 6338 inserted = PETSC_FALSE; 6339 } 6340 } else col = in[j]; 6341 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6342 #if defined(PETSC_HAVE_DEVICE) 6343 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6344 #endif 6345 } 6346 } 6347 } else if (!aij->donotstash) { 6348 if (roworiented) { 6349 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6350 } else { 6351 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6352 } 6353 } 6354 } 6355 } 6356 PetscFunctionReturnVoid(); 6357 } 6358 6359 typedef struct { 6360 Mat *mp; /* intermediate products */ 6361 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6362 PetscInt cp; /* number of intermediate products */ 6363 6364 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6365 PetscInt *startsj_s,*startsj_r; 6366 PetscScalar *bufa; 6367 Mat P_oth; 6368 6369 /* may take advantage of merging product->B */ 6370 Mat Bloc; /* B-local by merging diag and off-diag */ 6371 6372 /* cusparse does not have support to split between symbolic and numeric phases. 6373 When api_user is true, we don't need to update the numerical values 6374 of the temporary storage */ 6375 PetscBool reusesym; 6376 6377 /* support for COO values insertion */ 6378 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6379 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6380 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6381 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6382 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6383 PetscMemType mtype; 6384 6385 /* customization */ 6386 PetscBool abmerge; 6387 PetscBool P_oth_bind; 6388 } MatMatMPIAIJBACKEND; 6389 6390 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6391 { 6392 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6393 PetscInt i; 6394 PetscErrorCode ierr; 6395 6396 PetscFunctionBegin; 6397 ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr); 6398 ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr); 6399 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr); 6400 ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr); 6401 ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr); 6402 ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr); 6403 ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr); 6404 for (i = 0; i < mmdata->cp; i++) { 6405 ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr); 6406 } 6407 ierr = PetscFree2(mmdata->mp,mmdata->mptmp);CHKERRQ(ierr); 6408 ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr); 6409 ierr = PetscFree(mmdata->own);CHKERRQ(ierr); 6410 ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr); 6411 ierr = PetscFree(mmdata->off);CHKERRQ(ierr); 6412 ierr = PetscFree(mmdata);CHKERRQ(ierr); 6413 PetscFunctionReturn(0); 6414 } 6415 6416 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6417 { 6418 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6419 PetscErrorCode ierr; 6420 6421 PetscFunctionBegin; 6422 ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr); 6423 if (f) { 6424 ierr = (*f)(A,n,idx,v);CHKERRQ(ierr); 6425 } else { 6426 const PetscScalar *vv; 6427 6428 ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr); 6429 if (n && idx) { 6430 PetscScalar *w = v; 6431 const PetscInt *oi = idx; 6432 PetscInt j; 6433 6434 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6435 } else { 6436 ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr); 6437 } 6438 ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr); 6439 } 6440 PetscFunctionReturn(0); 6441 } 6442 6443 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6444 { 6445 MatMatMPIAIJBACKEND *mmdata; 6446 PetscInt i,n_d,n_o; 6447 PetscErrorCode ierr; 6448 6449 PetscFunctionBegin; 6450 MatCheckProduct(C,1); 6451 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6452 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6453 if (!mmdata->reusesym) { /* update temporary matrices */ 6454 if (mmdata->P_oth) { 6455 ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6456 } 6457 if (mmdata->Bloc) { 6458 ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr); 6459 } 6460 } 6461 mmdata->reusesym = PETSC_FALSE; 6462 6463 for (i = 0; i < mmdata->cp; i++) { 6464 if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6465 ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr); 6466 } 6467 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6468 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6469 6470 if (mmdata->mptmp[i]) continue; 6471 if (noff) { 6472 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6473 6474 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr); 6475 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr); 6476 n_o += noff; 6477 n_d += nown; 6478 } else { 6479 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6480 6481 ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr); 6482 n_d += mm->nz; 6483 } 6484 } 6485 if (mmdata->hasoffproc) { /* offprocess insertion */ 6486 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6487 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr); 6488 } 6489 ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr); 6490 PetscFunctionReturn(0); 6491 } 6492 6493 /* Support for Pt * A, A * P, or Pt * A * P */ 6494 #define MAX_NUMBER_INTERMEDIATE 4 6495 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6496 { 6497 Mat_Product *product = C->product; 6498 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6499 Mat_MPIAIJ *a,*p; 6500 MatMatMPIAIJBACKEND *mmdata; 6501 ISLocalToGlobalMapping P_oth_l2g = NULL; 6502 IS glob = NULL; 6503 const char *prefix; 6504 char pprefix[256]; 6505 const PetscInt *globidx,*P_oth_idx; 6506 PetscInt i,j,cp,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j; 6507 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6508 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6509 /* a base offset; type-2: sparse with a local to global map table */ 6510 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6511 6512 MatProductType ptype; 6513 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6514 PetscMPIInt size; 6515 PetscErrorCode ierr; 6516 6517 PetscFunctionBegin; 6518 MatCheckProduct(C,1); 6519 if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6520 ptype = product->type; 6521 if (product->A->symmetric && ptype == MATPRODUCT_AtB) { 6522 ptype = MATPRODUCT_AB; 6523 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 6524 } 6525 switch (ptype) { 6526 case MATPRODUCT_AB: 6527 A = product->A; 6528 P = product->B; 6529 m = A->rmap->n; 6530 n = P->cmap->n; 6531 M = A->rmap->N; 6532 N = P->cmap->N; 6533 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6534 break; 6535 case MATPRODUCT_AtB: 6536 P = product->A; 6537 A = product->B; 6538 m = P->cmap->n; 6539 n = A->cmap->n; 6540 M = P->cmap->N; 6541 N = A->cmap->N; 6542 hasoffproc = PETSC_TRUE; 6543 break; 6544 case MATPRODUCT_PtAP: 6545 A = product->A; 6546 P = product->B; 6547 m = P->cmap->n; 6548 n = P->cmap->n; 6549 M = P->cmap->N; 6550 N = P->cmap->N; 6551 hasoffproc = PETSC_TRUE; 6552 break; 6553 default: 6554 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6555 } 6556 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr); 6557 if (size == 1) hasoffproc = PETSC_FALSE; 6558 6559 /* defaults */ 6560 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6561 mp[i] = NULL; 6562 mptmp[i] = PETSC_FALSE; 6563 rmapt[i] = -1; 6564 cmapt[i] = -1; 6565 rmapa[i] = NULL; 6566 cmapa[i] = NULL; 6567 } 6568 6569 /* customization */ 6570 ierr = PetscNew(&mmdata);CHKERRQ(ierr); 6571 mmdata->reusesym = product->api_user; 6572 if (ptype == MATPRODUCT_AB) { 6573 if (product->api_user) { 6574 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 6575 ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6576 ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6577 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6578 } else { 6579 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 6580 ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr); 6581 ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6582 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6583 } 6584 } else if (ptype == MATPRODUCT_PtAP) { 6585 if (product->api_user) { 6586 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 6587 ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6588 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6589 } else { 6590 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 6591 ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr); 6592 ierr = PetscOptionsEnd();CHKERRQ(ierr); 6593 } 6594 } 6595 a = (Mat_MPIAIJ*)A->data; 6596 p = (Mat_MPIAIJ*)P->data; 6597 ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr); 6598 ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr); 6599 ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr); 6600 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6601 ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr); 6602 6603 cp = 0; 6604 switch (ptype) { 6605 case MATPRODUCT_AB: /* A * P */ 6606 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6607 6608 /* A_diag * P_local (merged or not) */ 6609 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 6610 /* P is product->B */ 6611 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6612 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6613 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6614 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6615 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6616 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6617 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6618 mp[cp]->product->api_user = product->api_user; 6619 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6620 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6621 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6622 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6623 rmapt[cp] = 1; 6624 cmapt[cp] = 2; 6625 cmapa[cp] = globidx; 6626 mptmp[cp] = PETSC_FALSE; 6627 cp++; 6628 } else { /* A_diag * P_diag and A_diag * P_off */ 6629 ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr); 6630 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6631 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6632 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6633 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6634 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6635 mp[cp]->product->api_user = product->api_user; 6636 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6637 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6638 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6639 rmapt[cp] = 1; 6640 cmapt[cp] = 1; 6641 mptmp[cp] = PETSC_FALSE; 6642 cp++; 6643 ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr); 6644 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6645 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6646 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6647 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6648 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6649 mp[cp]->product->api_user = product->api_user; 6650 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6651 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6652 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6653 rmapt[cp] = 1; 6654 cmapt[cp] = 2; 6655 cmapa[cp] = p->garray; 6656 mptmp[cp] = PETSC_FALSE; 6657 cp++; 6658 } 6659 6660 /* A_off * P_other */ 6661 if (mmdata->P_oth) { 6662 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); /* make P_oth use local col ids */ 6663 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6664 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6665 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6666 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6667 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6668 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6669 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6670 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6671 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6672 mp[cp]->product->api_user = product->api_user; 6673 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6674 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6675 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6676 rmapt[cp] = 1; 6677 cmapt[cp] = 2; 6678 cmapa[cp] = P_oth_idx; 6679 mptmp[cp] = PETSC_FALSE; 6680 cp++; 6681 } 6682 break; 6683 6684 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 6685 /* A is product->B */ 6686 ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6687 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 6688 ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6689 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6690 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6691 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6692 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6693 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6694 mp[cp]->product->api_user = product->api_user; 6695 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6696 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6697 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6698 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6699 rmapt[cp] = 2; 6700 rmapa[cp] = globidx; 6701 cmapt[cp] = 2; 6702 cmapa[cp] = globidx; 6703 mptmp[cp] = PETSC_FALSE; 6704 cp++; 6705 } else { 6706 ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6707 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6708 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6709 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6710 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6711 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6712 mp[cp]->product->api_user = product->api_user; 6713 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6714 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6715 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6716 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6717 rmapt[cp] = 1; 6718 cmapt[cp] = 2; 6719 cmapa[cp] = globidx; 6720 mptmp[cp] = PETSC_FALSE; 6721 cp++; 6722 ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6723 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6724 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6725 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6726 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6727 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6728 mp[cp]->product->api_user = product->api_user; 6729 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6730 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6731 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6732 rmapt[cp] = 2; 6733 rmapa[cp] = p->garray; 6734 cmapt[cp] = 2; 6735 cmapa[cp] = globidx; 6736 mptmp[cp] = PETSC_FALSE; 6737 cp++; 6738 } 6739 break; 6740 case MATPRODUCT_PtAP: 6741 ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr); 6742 /* P is product->B */ 6743 ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr); 6744 ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr); 6745 ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr); 6746 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6747 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6748 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6749 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6750 mp[cp]->product->api_user = product->api_user; 6751 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6752 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6753 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6754 ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr); 6755 rmapt[cp] = 2; 6756 rmapa[cp] = globidx; 6757 cmapt[cp] = 2; 6758 cmapa[cp] = globidx; 6759 mptmp[cp] = PETSC_FALSE; 6760 cp++; 6761 if (mmdata->P_oth) { 6762 ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); 6763 ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6764 ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr); 6765 ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr); 6766 ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr); 6767 ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr); 6768 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6769 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6770 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6771 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6772 mp[cp]->product->api_user = product->api_user; 6773 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6774 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6775 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6776 mptmp[cp] = PETSC_TRUE; 6777 cp++; 6778 ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr); 6779 ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr); 6780 ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr); 6781 ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr); 6782 ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr); 6783 ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr); 6784 mp[cp]->product->api_user = product->api_user; 6785 ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr); 6786 if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6787 ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr); 6788 rmapt[cp] = 2; 6789 rmapa[cp] = globidx; 6790 cmapt[cp] = 2; 6791 cmapa[cp] = P_oth_idx; 6792 mptmp[cp] = PETSC_FALSE; 6793 cp++; 6794 } 6795 break; 6796 default: 6797 SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6798 } 6799 /* sanity check */ 6800 if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i); 6801 6802 ierr = PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);CHKERRQ(ierr); 6803 for (i = 0; i < cp; i++) { 6804 mmdata->mp[i] = mp[i]; 6805 mmdata->mptmp[i] = mptmp[i]; 6806 } 6807 mmdata->cp = cp; 6808 C->product->data = mmdata; 6809 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 6810 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 6811 6812 /* memory type */ 6813 mmdata->mtype = PETSC_MEMTYPE_HOST; 6814 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr); 6815 ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr); 6816 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 6817 // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented 6818 //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE; 6819 6820 /* prepare coo coordinates for values insertion */ 6821 6822 /* count total nonzeros of those intermediate seqaij Mats 6823 ncoo_d: # of nonzeros of matrices that do not have offproc entries 6824 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 6825 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 6826 */ 6827 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 6828 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6829 if (mptmp[cp]) continue; 6830 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 6831 const PetscInt *rmap = rmapa[cp]; 6832 const PetscInt mr = mp[cp]->rmap->n; 6833 const PetscInt rs = C->rmap->rstart; 6834 const PetscInt re = C->rmap->rend; 6835 const PetscInt *ii = mm->i; 6836 for (i = 0; i < mr; i++) { 6837 const PetscInt gr = rmap[i]; 6838 const PetscInt nz = ii[i+1] - ii[i]; 6839 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 6840 else ncoo_oown += nz; /* this row is local */ 6841 } 6842 } else ncoo_d += mm->nz; 6843 } 6844 6845 /* 6846 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 6847 6848 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 6849 6850 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 6851 6852 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 6853 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 6854 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 6855 6856 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 6857 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 6858 */ 6859 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); /* +1 to make a csr-like data structure */ 6860 ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr); 6861 6862 /* gather (i,j) of nonzeros inserted by remote procs */ 6863 if (hasoffproc) { 6864 PetscSF msf; 6865 PetscInt ncoo2,*coo_i2,*coo_j2; 6866 6867 ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr); 6868 ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr); 6869 ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); /* to collect (i,j) of entries to be sent to others */ 6870 6871 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 6872 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6873 PetscInt *idxoff = mmdata->off[cp]; 6874 PetscInt *idxown = mmdata->own[cp]; 6875 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 6876 const PetscInt *rmap = rmapa[cp]; 6877 const PetscInt *cmap = cmapa[cp]; 6878 const PetscInt *ii = mm->i; 6879 PetscInt *coi = coo_i + ncoo_o; 6880 PetscInt *coj = coo_j + ncoo_o; 6881 const PetscInt mr = mp[cp]->rmap->n; 6882 const PetscInt rs = C->rmap->rstart; 6883 const PetscInt re = C->rmap->rend; 6884 const PetscInt cs = C->cmap->rstart; 6885 for (i = 0; i < mr; i++) { 6886 const PetscInt *jj = mm->j + ii[i]; 6887 const PetscInt gr = rmap[i]; 6888 const PetscInt nz = ii[i+1] - ii[i]; 6889 if (gr < rs || gr >= re) { /* this is an offproc row */ 6890 for (j = ii[i]; j < ii[i+1]; j++) { 6891 *coi++ = gr; 6892 *idxoff++ = j; 6893 } 6894 if (!cmapt[cp]) { /* already global */ 6895 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6896 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6897 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6898 } else { /* offdiag */ 6899 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6900 } 6901 ncoo_o += nz; 6902 } else { /* this is a local row */ 6903 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 6904 } 6905 } 6906 } 6907 mmdata->off[cp + 1] = idxoff; 6908 mmdata->own[cp + 1] = idxown; 6909 } 6910 6911 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6912 ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr); 6913 ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr); 6914 ierr = PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);CHKERRQ(ierr); 6915 ncoo = ncoo_d + ncoo_oown + ncoo2; 6916 ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr); 6917 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); /* put (i,j) of remote nonzeros at back */ 6918 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6919 ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6920 ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); 6921 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6922 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 6923 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr); 6924 coo_i = coo_i2; 6925 coo_j = coo_j2; 6926 } else { /* no offproc values insertion */ 6927 ncoo = ncoo_d; 6928 ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr); 6929 6930 ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr); 6931 ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr); 6932 ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr); 6933 } 6934 mmdata->hasoffproc = hasoffproc; 6935 6936 /* gather (i,j) of nonzeros inserted locally */ 6937 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 6938 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 6939 PetscInt *coi = coo_i + ncoo_d; 6940 PetscInt *coj = coo_j + ncoo_d; 6941 const PetscInt *jj = mm->j; 6942 const PetscInt *ii = mm->i; 6943 const PetscInt *cmap = cmapa[cp]; 6944 const PetscInt *rmap = rmapa[cp]; 6945 const PetscInt mr = mp[cp]->rmap->n; 6946 const PetscInt rs = C->rmap->rstart; 6947 const PetscInt re = C->rmap->rend; 6948 const PetscInt cs = C->cmap->rstart; 6949 6950 if (mptmp[cp]) continue; 6951 if (rmapt[cp] == 1) { /* consecutive rows */ 6952 /* fill coo_i */ 6953 for (i = 0; i < mr; i++) { 6954 const PetscInt gr = i + rs; 6955 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 6956 } 6957 /* fill coo_j */ 6958 if (!cmapt[cp]) { /* type-0, already global */ 6959 ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr); 6960 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 6961 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 6962 } else { /* type-2, local to global for sparse columns */ 6963 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 6964 } 6965 ncoo_d += mm->nz; 6966 } else if (rmapt[cp] == 2) { /* sparse rows */ 6967 for (i = 0; i < mr; i++) { 6968 const PetscInt *jj = mm->j + ii[i]; 6969 const PetscInt gr = rmap[i]; 6970 const PetscInt nz = ii[i+1] - ii[i]; 6971 if (gr >= rs && gr < re) { /* local rows */ 6972 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 6973 if (!cmapt[cp]) { /* type-0, already global */ 6974 for (j = 0; j < nz; j++) *coj++ = jj[j]; 6975 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 6976 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 6977 } else { /* type-2, local to global for sparse columns */ 6978 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 6979 } 6980 ncoo_d += nz; 6981 } 6982 } 6983 } 6984 } 6985 if (glob) { 6986 ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr); 6987 } 6988 ierr = ISDestroy(&glob);CHKERRQ(ierr); 6989 if (P_oth_l2g) { 6990 ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr); 6991 } 6992 ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr); 6993 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 6994 ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr); 6995 6996 /* preallocate with COO data */ 6997 ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr); 6998 ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr); 6999 PetscFunctionReturn(0); 7000 } 7001 7002 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7003 { 7004 Mat_Product *product = mat->product; 7005 PetscErrorCode ierr; 7006 #if defined(PETSC_HAVE_DEVICE) 7007 PetscBool match = PETSC_FALSE; 7008 PetscBool usecpu = PETSC_FALSE; 7009 #else 7010 PetscBool match = PETSC_TRUE; 7011 #endif 7012 7013 PetscFunctionBegin; 7014 MatCheckProduct(mat,1); 7015 #if defined(PETSC_HAVE_DEVICE) 7016 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7017 ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr); 7018 } 7019 if (match) { /* we can always fallback to the CPU if requested */ 7020 switch (product->type) { 7021 case MATPRODUCT_AB: 7022 if (product->api_user) { 7023 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 7024 ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7025 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7026 } else { 7027 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 7028 ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7029 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7030 } 7031 break; 7032 case MATPRODUCT_AtB: 7033 if (product->api_user) { 7034 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr); 7035 ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7036 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7037 } else { 7038 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr); 7039 ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7040 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7041 } 7042 break; 7043 case MATPRODUCT_PtAP: 7044 if (product->api_user) { 7045 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 7046 ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7047 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7048 } else { 7049 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 7050 ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr); 7051 ierr = PetscOptionsEnd();CHKERRQ(ierr); 7052 } 7053 break; 7054 default: 7055 break; 7056 } 7057 match = (PetscBool)!usecpu; 7058 } 7059 #endif 7060 if (match) { 7061 switch (product->type) { 7062 case MATPRODUCT_AB: 7063 case MATPRODUCT_AtB: 7064 case MATPRODUCT_PtAP: 7065 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7066 break; 7067 default: 7068 break; 7069 } 7070 } 7071 /* fallback to MPIAIJ ops */ 7072 if (!mat->ops->productsymbolic) { 7073 ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr); 7074 } 7075 PetscFunctionReturn(0); 7076 } 7077